Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

知识库导入网页数据支持设置更新频率 #751

Merged
merged 1 commit into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/BasisModule/Platform/KnowledgeBase/knowledgebase.md
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,17 @@ knowledge.delete_knowledge_base("da51a988-cbe7-4b24-aa5b-768985e8xxxx")
`DocumentSource`类定义如下:

```python
class DocumentSourceUrlConfig(BaseModel):
frequency: int = Field(
...,
description="更新频率,目前支持的更新频率为-1(不自动更新),1(每天),3(每3天),7(每7天),30(每30天)。",
)

class DocumentSource(BaseModel):
type: str = Field(..., description="数据来源类型", enum=["bos", "web"])
urls: list[str] = Field(None, description="文档URL")
urlDepth: int = Field(None, description="url下钻深度,1时不下钻")
urlConfigs: Optional[list[DocumentSourceUrlConfig]] = Field(None, description="该字段的长度需要和source、urls字段长度保持一致。")
```

`DocumentProcessOption`类及衍生类定义如下:
Expand Down Expand Up @@ -364,6 +371,7 @@ knowledge.create_documents(
type="web",
urls=["https://baijiahao.baidu.com/s?id=1802527379394162441"],
urlDepth=1,
urlConfigs=[appbuilder.DocumentSourceUrlConfig(frequency=1)]
),
processOption=appbuilder.DocumentProcessOption(
template="custom",
Expand Down
11 changes: 8 additions & 3 deletions go/appbuilder/knowledge_base_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,15 @@ type GetKnowledgeBaseListResponse struct {
MaxKeys int `json:"maxKeys"`
}

type DocumentsSourceUrlConfig struct {
Frequency int `json:"frequency"`
}

type DocumentsSource struct {
Type string `json:"type"`
Urls []string `json:"urls,omitempty"`
UrlDepth int `json:"url_depth,omitempty"`
Type string `json:"type"`
Urls []string `json:"urls,omitempty"`
UrlDepth int `json:"url_depth,omitempty"`
UrlConfigs *[]DocumentsSourceUrlConfig `json:"url_configs,omitempty"`
}

type DocumentsProcessOptionParser struct {
Expand Down
5 changes: 5 additions & 0 deletions go/appbuilder/knowledge_base_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,11 @@ func TestCreateKnowledgeBase(t *testing.T) {
Type: "web",
Urls: []string{"https://baijiahao.baidu.com/s?id=1802527379394162441"},
UrlDepth: 1,
UrlConfigs: &[]DocumentsSourceUrlConfig{
{
Frequency: 1,
},
},
},
ProcessOption: &DocumentsProcessOption{
Template: "custom",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,21 @@ public static class Source {
private String type;
private String[] urls;
private Integer urlDepth;
private UrlConfig[] urlConfigs;

public Source(String type, String[] urls, Integer urlDepth) {
this.type = type;
this.urls = urls;
this.urlDepth = urlDepth;
}

public Source(String type, String[] urls, Integer urlDepth, UrlConfig[] urlConfigs) {
this.type = type;
this.urls = urls;
this.urlDepth = urlDepth;
this.urlConfigs = urlConfigs;
}

public String getType() {
return type;
}
Expand All @@ -55,6 +63,30 @@ public String[] getUrls() {
public Integer getUrlDepth() {
return urlDepth;
}

public UrlConfig[] getUrlConfigs() {
return urlConfigs;
}

public void setUrlConfigs(UrlConfig[] urlConfigs) {
this.urlConfigs = urlConfigs;
}

public static class UrlConfig {
private Integer frequency;

public UrlConfig(Integer frequency) {
this.frequency = frequency;
}

public Integer getFrequency() {
return frequency;
}

public void setFrequency(Integer frequency) {
this.frequency = frequency;
}
}
}

public static class ProcessOption {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,9 @@ public void testCreateKnowledgebase() throws IOException, AppBuilderServerExcept
knowledgebase.modifyKnowledgeBase(modifyRequest);

// 导入知识库
DocumentsCreateRequest.Source.UrlConfig[] urlConfigs = {new DocumentsCreateRequest.Source.UrlConfig(1)};
DocumentsCreateRequest.Source source = new DocumentsCreateRequest.Source("web",
new String[] {"https://baijiahao.baidu.com/s?id=1802527379394162441"}, 1);
new String[] {"https://baijiahao.baidu.com/s?id=1802527379394162441"}, 1, urlConfigs);
DocumentsCreateRequest.ProcessOption.Parser parser =
new DocumentsCreateRequest.ProcessOption.Parser(
new String[] {"layoutAnalysis", "ocr"});
Expand Down
3 changes: 2 additions & 1 deletion python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def get_default_header():
from appbuilder.core.console.appbuilder_client.appbuilder_client import get_app_list, get_all_apps, describe_apps
from appbuilder.core.console.component_client.component_client import ComponentClient
from appbuilder.core.console.knowledge_base.knowledge_base import KnowledgeBase
from appbuilder.core.console.knowledge_base.data_class import CustomProcessRule, DocumentSource, DocumentChoices, DocumentChunker, DocumentSeparator, DocumentPattern, DocumentProcessOption
from appbuilder.core.console.knowledge_base.data_class import CustomProcessRule, DocumentSource, DocumentChoices, DocumentChunker, DocumentSeparator, DocumentPattern, DocumentProcessOption, DocumentSourceUrlConfig

from .core._exception import (
BadRequestException,
Expand Down Expand Up @@ -234,6 +234,7 @@ def get_default_header():
"DocumentSeparator",
"DocumentPattern",
"DocumentProcessOption",
"DocumentSourceUrlConfig"
"assistant",
"StreamRunContext",
"AssistantEventHandler",
Expand Down
1 change: 1 addition & 0 deletions python/core/console/knowledge_base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .data_class import (
CustomProcessRule,
DocumentSource,
DocumentSourceUrlConfig,
DocumentProcessOption,
DocumentChoices,
DocumentSeparator,
Expand Down
8 changes: 8 additions & 0 deletions python/core/console/knowledge_base/data_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,18 @@ class KnowledgeBaseGetListResponse(BaseModel):
isTruncated: bool = Field(..., description="是否有更多结果")


class DocumentSourceUrlConfig(BaseModel):
frequency: int = Field(
...,
description="更新频率,目前支持的更新频率为-1(不自动更新),1(每天),3(每3天),7(每7天),30(每30天)。",
)


class DocumentSource(BaseModel):
type: str = Field(..., description="数据来源类型", enum=["bos", "web"])
urls: list[str] = Field(None, description="文档URL")
urlDepth: int = Field(None, description="url下钻深度,1时不下钻")
urlConfigs: Optional[list[DocumentSourceUrlConfig]] = Field(None, description="该字段的长度需要和source、urls字段长度保持一致。")


class DocumentChoices(BaseModel):
Expand Down
1 change: 1 addition & 0 deletions python/tests/test_knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def test_create_knowledge_base(self):
type="web",
urls=["https://baijiahao.baidu.com/s?id=1802527379394162441"],
urlDepth=1,
urlConfigs=[appbuilder.DocumentSourceUrlConfig(frequency=1)]
),
processOption=appbuilder.DocumentProcessOption(
template="custom",
Expand Down
Loading