Skip to content

Commit

Permalink
Merge pull request #27 from zhuzhongshu123/add_code_format
Browse files Browse the repository at this point in the history
Add code format
  • Loading branch information
zhuzhongshu123 authored Nov 5, 2024
2 parents 43af179 + 6ae47c5 commit c7e8745
Show file tree
Hide file tree
Showing 196 changed files with 5,932 additions and 3,147 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/cloud-code-scan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Alipay Cloud Devops Codescan
on:
pull_request_target:
jobs:
stc:
runs-on: ubuntu-latest
steps:
- name: codeScan
uses: layotto/alipay-cloud-devops-codescan@main
with:
parent_uid: ${{ secrets.ALI_PID }}
private_key: ${{ secrets.ALI_PK }}
scan_type: stc
sca:
runs-on: ubuntu-latest
steps:
- name: codeScan
uses: layotto/alipay-cloud-devops-codescan@main
with:
parent_uid: ${{ secrets.ALI_PID }}
private_key: ${{ secrets.ALI_PK }}
scan_type: sca
28 changes: 28 additions & 0 deletions .github/workflows/code-format-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Code Format Check

on:
push:
pull_request:
workflow_dispatch:
repository_dispatch:
types: [my_event]
jobs:
format-check:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pre-commit
- name: Run pre-commit
run: pre-commit run --all-files
25 changes: 25 additions & 0 deletions .github/workflows/license-checker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: License Checker

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
check:
name: "License Validation"
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Check License Header
uses: apache/skywalking-eyes@main
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
log: info
- name: Check Dependencies' License
uses: apache/skywalking-eyes/dependency@main
28 changes: 28 additions & 0 deletions .github/workflows/pr-title-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: "Lint PR"

on:
pull_request_target:
types:
- opened
- edited
- synchronize

jobs:
main:
name: Validate PR title
runs-on: ubuntu-latest
steps:
# https://www.conventionalcommits.org/en/v1.0.0/#summary
- uses: amannn/action-semantic-pull-request@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
requireScope: true
subjectPattern: ^(?![A-Z]).+$
# If `subjectPattern` is configured, you can use this property to override
# the default error message that is shown when the pattern doesn't match.
# The variables `subject` and `title` can be used within the message.
subjectPatternError: |
The subject "{subject}" found in the pull request title "{title}"
didn't match the configured pattern. Please ensure that the subject
doesn't start with an uppercase character.
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
repos:
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
files: ^kag/.*\.py$
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
files: ^kag/.*\.py$

1 change: 0 additions & 1 deletion kag/builder/component/aligner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

4 changes: 2 additions & 2 deletions kag/builder/component/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def _init_llm(self) -> LLMClient:
try:
config = ProjectClient().get_config(project_id)
llm_config.update(config.get("llm", {}))
except:
except Exception as e:
logging.warning(
f"Failed to get project config for project id: {project_id}"
f"Failed to get project config for project id: {project_id}, info: {e}"
)
llm = LLMClient.from_config(llm_config)
return llm
Expand Down
49 changes: 36 additions & 13 deletions kag/builder/component/extractor/kag_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,16 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
self.llm = self._init_llm()
self.prompt_config = self.config.get("prompt", {})
self.biz_scene = self.prompt_config.get("biz_scene") or os.getenv("KAG_PROMPT_BIZ_SCENE", "default")
self.language = self.prompt_config.get("language") or os.getenv("KAG_PROMPT_LANGUAGE", "en")
self.biz_scene = self.prompt_config.get("biz_scene") or os.getenv(
"KAG_PROMPT_BIZ_SCENE", "default"
)
self.language = self.prompt_config.get("language") or os.getenv(
"KAG_PROMPT_LANGUAGE", "en"
)
self.schema = SchemaClient(project_id=self.project_id).load()
self.ner_prompt = PromptOp.load(self.biz_scene, "ner")(language=self.language, project_id=self.project_id)
self.ner_prompt = PromptOp.load(self.biz_scene, "ner")(
language=self.language, project_id=self.project_id
)
self.std_prompt = PromptOp.load(self.biz_scene, "std")(language=self.language)
self.triple_prompt = PromptOp.load(self.biz_scene, "triple")(
language=self.language
Expand All @@ -60,7 +66,9 @@ def __init__(self, **kwargs):
self.kg_types.append(type_name)
break
if self.kg_types:
self.kg_prompt = SPG_KGPrompt(self.kg_types, language=self.language, project_id=self.project_id)
self.kg_prompt = SPG_KGPrompt(
self.kg_types, language=self.language, project_id=self.project_id
)

@property
def input_types(self) -> Type[Input]:
Expand Down Expand Up @@ -130,17 +138,26 @@ def assemble_sub_graph_with_spg_records(self, entities: List[Dict]):
continue
if prop_name in spg_type.properties:
from knext.schema.model.property import Property

prop: Property = spg_type.properties.get(prop_name)
o_label = prop.object_type_name_en
if o_label not in BASIC_TYPES:
if isinstance(prop_value, str):
prop_value = [prop_value]
for o_name in prop_value:
sub_graph.add_node(id=o_name, name=o_name, label=o_label)
sub_graph.add_edge(s_id=s_name, s_label=s_label, p=prop_name, o_id=o_name, o_label=o_label)
sub_graph.add_edge(
s_id=s_name,
s_label=s_label,
p=prop_name,
o_id=o_name,
o_label=o_label,
)
tmp_properties.pop(prop_name)
record["properties"] = tmp_properties
sub_graph.add_node(id=s_name, name=s_name, label=s_label, properties=properties)
sub_graph.add_node(
id=s_name, name=s_name, label=s_label, properties=properties
)
return sub_graph, entities

@staticmethod
Expand Down Expand Up @@ -174,10 +191,9 @@ def get_category(entities_data, entity_name):
if o_category is None:
o_category = OTHER_TYPE
sub_graph.add_node(tri[2], tri[2], o_category)

sub_graph.add_edge(
tri[0], s_category, to_camel_case(tri[1]), tri[2], o_category
)
edge_type = to_camel_case(tri[1])
if edge_type:
sub_graph.add_edge(tri[0], s_category, edge_type, tri[2], o_category)

return sub_graph

Expand All @@ -199,14 +215,18 @@ def assemble_sub_graph_with_chunk(sub_graph: SubGraph, chunk: Chunk):
"id": chunk.id,
"name": chunk.name,
"content": f"{chunk.name}\n{chunk.content}",
**chunk.kwargs
**chunk.kwargs,
},
)
sub_graph.id = chunk.id
return sub_graph

def assemble_sub_graph(
self, sub_graph: SubGraph, chunk: Chunk, entities: List[Dict], triples: List[list]
self,
sub_graph: SubGraph,
chunk: Chunk,
entities: List[Dict],
triples: List[list],
):
"""
Integrates entity and triple information into a subgraph, and associates it with a chunk of text.
Expand Down Expand Up @@ -311,7 +331,10 @@ def invoke(self, input: Input, **kwargs) -> List[Output]:
try:
entities = self.named_entity_recognition(passage)
sub_graph, entities = self.assemble_sub_graph_with_spg_records(entities)
filtered_entities = [{k: v for k, v in ent.items() if k in ["entity", "category"]} for ent in entities]
filtered_entities = [
{k: v for k, v in ent.items() if k in ["entity", "category"]}
for ent in entities
]
triples = self.triples_extraction(passage, filtered_entities)
std_entities = self.named_entity_standardization(passage, filtered_entities)
self.append_official_name(entities, std_entities)
Expand Down
22 changes: 18 additions & 4 deletions kag/builder/component/extractor/spg_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def __init__(self, **kwargs):
self.spg_ner_types.append(type_name)
continue
self.kag_ner_types.append(type_name)
self.kag_ner_prompt = PromptOp.load(self.biz_scene, "ner")(language=self.language, project_id=self.project_id)
self.kag_ner_prompt = PromptOp.load(self.biz_scene, "ner")(
language=self.language, project_id=self.project_id
)
self.spg_ner_prompt = SPG_KGPrompt(self.spg_ner_types, self.language)

@retry(stop=stop_after_attempt(3))
Expand Down Expand Up @@ -72,17 +74,26 @@ def assemble_sub_graph_with_spg_records(self, entities: List[Dict]):
continue
if prop_name in spg_type.properties:
from knext.schema.model.property import Property

prop: Property = spg_type.properties.get(prop_name)
o_label = prop.object_type_name_en
if o_label not in BASIC_TYPES:
if isinstance(prop_value, str):
prop_value = [prop_value]
for o_name in prop_value:
sub_graph.add_node(id=o_name, name=o_name, label=o_label)
sub_graph.add_edge(s_id=s_name, s_label=s_label, p=prop_name, o_id=o_name, o_label=o_label)
sub_graph.add_edge(
s_id=s_name,
s_label=s_label,
p=prop_name,
o_id=o_name,
o_label=o_label,
)
tmp_properties.pop(prop_name)
record["properties"] = tmp_properties
sub_graph.add_node(id=s_name, name=s_name, label=s_label, properties=properties)
sub_graph.add_node(
id=s_name, name=s_name, label=s_label, properties=properties
)
return sub_graph, entities

def invoke(self, input: Input, **kwargs) -> List[Output]:
Expand All @@ -102,7 +113,10 @@ def invoke(self, input: Input, **kwargs) -> List[Output]:
try:
entities = self.named_entity_recognition(passage)
sub_graph, entities = self.assemble_sub_graph_with_spg_records(entities)
filtered_entities = [{k: v for k, v in ent.items() if k in ["entity", "category"]} for ent in entities]
filtered_entities = [
{k: v for k, v in ent.items() if k in ["entity", "category"]}
for ent in entities
]
triples = self.triples_extraction(passage, filtered_entities)
std_entities = self.named_entity_standardization(passage, filtered_entities)
self.append_official_name(entities, std_entities)
Expand Down
2 changes: 1 addition & 1 deletion kag/builder/component/mapping/relation_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
subject_name: SPGTypeName,
predicate_name: RelationName,
object_name: SPGTypeName,
**kwargs
**kwargs,
):
super().__init__(**kwargs)
schema = SchemaClient(project_id=self.project_id).load()
Expand Down
4 changes: 3 additions & 1 deletion kag/builder/component/mapping/spg_type_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ class SPGTypeMapping(MappingABC):
fuse_op (FuseOpABC, optional): The user-defined fuse operator. Defaults to None.
"""

def __init__(self, spg_type_name: SPGTypeName, fuse_func: FuseFunc = None, **kwargs):
def __init__(
self, spg_type_name: SPGTypeName, fuse_func: FuseFunc = None, **kwargs
):
super().__init__(**kwargs)
self.schema = SchemaClient(project_id=self.project_id).load()
assert (
Expand Down
21 changes: 17 additions & 4 deletions kag/builder/component/mapping/spo_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@


class SPOMapping(MappingABC):

def __init__(self):
super().__init__()
self.s_type_col = None
Expand All @@ -39,7 +38,14 @@ def input_types(self) -> Type[Input]:
def output_types(self) -> Type[Output]:
return SubGraph

def add_field_mappings(self, s_id_col: str, p_type_col: str, o_id_col: str, s_type_col: str = None, o_type_col: str = None):
def add_field_mappings(
self,
s_id_col: str,
p_type_col: str,
o_id_col: str,
s_type_col: str = None,
o_type_col: str = None,
):
self.s_type_col = s_type_col
self.s_id_col = s_id_col
self.p_type_col = p_type_col
Expand Down Expand Up @@ -86,14 +92,21 @@ def assemble_sub_graph(self, record: Dict[str, str]):
sub_graph.add_node(id=o_id, name=o_id, label=o_type)
sub_properties = {}
if self.sub_property_col:
sub_properties = json.loads(record.get(self.sub_property_col, '{}'))
sub_properties = json.loads(record.get(self.sub_property_col, "{}"))
sub_properties = {k: str(v) for k, v in sub_properties.items()}
else:
for target_name, source_names in self.sub_property_mapping.items():
for source_name in source_names:
value = record.get(source_name)
sub_properties[target_name] = value
sub_graph.add_edge(s_id=s_id, s_label=s_type, p=p, o_id=o_id, o_label=o_type, properties=sub_properties)
sub_graph.add_edge(
s_id=s_id,
s_label=s_type,
p=p,
o_id=o_id,
o_label=o_type,
properties=sub_properties,
)
return sub_graph

def invoke(self, input: Input, **kwargs) -> List[Output]:
Expand Down
6 changes: 5 additions & 1 deletion kag/builder/component/reader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
from kag.builder.component.reader.markdown_reader import MarkDownReader
from kag.builder.component.reader.docx_reader import DocxReader
from kag.builder.component.reader.txt_reader import TXTReader
from kag.builder.component.reader.dataset_reader import HotpotqaCorpusReader, TwowikiCorpusReader, MusiqueCorpusReader
from kag.builder.component.reader.dataset_reader import (
HotpotqaCorpusReader,
TwowikiCorpusReader,
MusiqueCorpusReader,
)
from kag.builder.component.reader.yuque_reader import YuqueReader

__all__ = [
Expand Down
Loading

0 comments on commit c7e8745

Please sign in to comment.