Skip to content

Commit

Permalink
【RAG】文档上传支持图表解析、表格深度解析 && 切片接口增加知识库ID参数
Browse files Browse the repository at this point in the history
  • Loading branch information
userpj committed Jan 2, 2025
1 parent c2e7a38 commit bb03e0d
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 19 deletions.
2 changes: 1 addition & 1 deletion docs/BasisModule/Platform/KnowledgeBase/knowledgebase.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ class DocumentProcessOption(BaseModel):
description="模板类型,ppt:模版配置—ppt幻灯片, resume:模版配置—简历文档, paper:模版配置—论文文档, custom:自定义配置—自定义切片, default:自定义配置—默认切分",
enum=["ppt", "paper", "qaPair", "resume", " custom", "default"],
)
parser: Optional[DocumentChoices] = Field(None, description="解析方法(文字提取默认启动,参数不体现,layoutAnalysis版面分析,ocr按需增加)")
parser: Optional[DocumentChoices] = Field(None, description="解析方法(文字提取默认启动,参数不体现,layoutAnalysis版面分析,ocr光学字符识别,pageImageAnalysis文档图片解析,chartAnalysis图表解析,tableAnalysis表格深度解析,按需增加)")
knowledgeAugmentation: Optional[DocumentChoices] = Field(
None, description="知识增强,faq、spokenQuery、spo、shortSummary按需增加。问题生成:faq、spokenQuery,段落摘要:shortSummary,三元组知识抽取:spo"
)
Expand Down
33 changes: 19 additions & 14 deletions go/appbuilder/knowledge_base_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,29 +190,33 @@ type UploadDocumentsResponse struct {
}

type CreateChunkRequest struct {
DocumentID string `json:"documentId"`
Content string `json:"content"`
ClientToken string `json:"client_token,omitempty"`
KnowledgeBaseID string `json:"knowledgeBaseId"`
DocumentID string `json:"documentId"`
Content string `json:"content"`
ClientToken string `json:"client_token,omitempty"`
}

type CreateChunkResponse struct {
ID string `json:"id"`
}

type ModifyChunkRequest struct {
ChunkID string `json:"chunkId"`
Content string `json:"content"`
Enable bool `json:"enable"`
ClientToken string `json:"client_token,omitempty"`
KnowledgeBaseID string `json:"knowledgeBaseId"`
ChunkID string `json:"chunkId"`
Content string `json:"content"`
Enable bool `json:"enable"`
ClientToken string `json:"client_token,omitempty"`
}

type DeleteChunkRequest struct {
ChunkID string `json:"chunkId"`
ClientToken string `json:"client_token,omitempty"`
KnowledgeBaseID string `json:"knowledgeBaseId"`
ChunkID string `json:"chunkId"`
ClientToken string `json:"client_token,omitempty"`
}

type DescribeChunkRequest struct {
ChunkID string `json:"chunkId"`
KnowledgeBaseID string `json:"knowledgeBaseId"`
ChunkID string `json:"chunkId"`
}

type DescribeChunkResponse struct {
Expand All @@ -232,10 +236,11 @@ type DescribeChunkResponse struct {
}

type DescribeChunksRequest struct {
DocumnetID string `json:"documentId"`
Marker string `json:"marker,omitempty"`
MaxKeys int `json:"maxKeys,omitempty"`
Type string `json:"type,omitempty"`
KnowledgeBaseID string `json:"knowledgeBaseId"`
DocumnetID string `json:"documentId"`
Marker string `json:"marker,omitempty"`
MaxKeys int `json:"maxKeys,omitempty"`
Type string `json:"type,omitempty"`
}

type DescribeChunksResponse struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import com.baidubce.appbuilder.model.knowledgebase.*;

public class Knowledgebase extends Component {
private String knowledgeBaseId;

public Knowledgebase() {
super();
}
Expand All @@ -25,6 +27,11 @@ public Knowledgebase(String SecretKey) {
super(SecretKey);
}

public Knowledgebase(String knowledgeBaseId, String SecretKey) {
super(SecretKey);
this.knowledgeBaseId = knowledgeBaseId;
}

/**
* 上传文档
*
Expand Down Expand Up @@ -537,8 +544,14 @@ public String createChunk(String documentId, String content, String clientToken)
private String innerCreateChunk(String documentId, String content, String clientToken)
throws IOException, AppBuilderServerException {
String url = AppBuilderConfig.CHUNK_CREATE_URL;

ChunkCreateRequest request = new ChunkCreateRequest(documentId, content);

ChunkCreateRequest request;
if(this.knowledgeBaseId.isEmpty()) {
request = new ChunkCreateRequest(documentId, content);
} else {
request = new ChunkCreateRequest(this.knowledgeBaseId, documentId, content);
}

String jsonBody = JsonUtils.serialize(request);
url = url + "&clientToken=" + clientToken;
ClassicHttpRequest postRequest = httpClient.createPostRequestV2(url,
Expand Down Expand Up @@ -592,7 +605,12 @@ private void innerModifyChunk(String chunkId, String content, boolean enable, St
throws IOException, AppBuilderServerException {
String url = AppBuilderConfig.CHUNK_MODIFY_URL;

ChunkModifyRequest request = new ChunkModifyRequest(chunkId, content, enable);
ChunkModifyRequest request;
if (this.knowledgeBaseId.isEmpty()) {
request = new ChunkModifyRequest(chunkId, content, enable);
} else {
request = new ChunkModifyRequest(this.knowledgeBaseId, chunkId, content, enable);
}
String jsonBody = JsonUtils.serialize(request);
url = url + "&clientToken=" + clientToken;
ClassicHttpRequest postRequest = httpClient.createPostRequestV2(url,
Expand Down Expand Up @@ -634,8 +652,12 @@ public void deleteChunk(String chunkId, String clientToken) throws IOException,
*/
private void innderDeleteChunk(String chunkId, String clientToken) throws IOException, AppBuilderServerException {
String url = AppBuilderConfig.CHUNK_DELETE_URL;

ChunkDeleteRequest request = new ChunkDeleteRequest();
request.setChunkId(chunkId);
if (!this.knowledgeBaseId.isEmpty()) {
request.setKnowledgeBaseId(this.knowledgeBaseId);
}
String jsonBody = JsonUtils.serialize(request);
url = url + "&clientToken=" + clientToken;
ClassicHttpRequest postRequest = httpClient.createPostRequestV2(url,
Expand All @@ -658,6 +680,9 @@ public ChunkDescribeResponse describeChunk(String chunkId)

ChunkDescribeRequest request = new ChunkDescribeRequest();
request.setChunkId(chunkId);
if (!this.knowledgeBaseId.isEmpty()) {
request.setKnowledgeBaseId(this.knowledgeBaseId);
}
String jsonBody = JsonUtils.serialize(request);
ClassicHttpRequest postRequest = httpClient.createPostRequestV2(url,
new StringEntity(jsonBody, StandardCharsets.UTF_8));
Expand All @@ -683,6 +708,9 @@ public ChunksDescribeResponse describeChunks(String documentId, String marker, I
String url = AppBuilderConfig.CHUNKS_DESCRIBE_URL;

ChunksDescribeRequest request = new ChunksDescribeRequest(documentId, marker, maxKeys, type);
if (!this.knowledgeBaseId.isEmpty()) {
request.setKnowledgeBaseId(this.knowledgeBaseId);
}
String jsonBody = JsonUtils.serialize(request);
ClassicHttpRequest postRequest = httpClient.createPostRequestV2(url,
new StringEntity(jsonBody, StandardCharsets.UTF_8));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,43 @@
package com.baidubce.appbuilder.model.knowledgebase;

public class ChunkCreateRequest {
private String knowledgeBaseId;
private String documentId;
private String content;


public ChunkCreateRequest(String documetId, String content) {
this.documentId = documetId;
this.content = content;
}

public ChunkCreateRequest(String knowledgeBaseId, String documetId, String content) {
this.knowledgeBaseId = knowledgeBaseId;
this.documentId = documetId;
this.content = content;
}

public String getKnowledgeBaseId() {
return knowledgeBaseId;
}

public void setKnowledgeBaseId(String knowledgeBaseId) {
this.knowledgeBaseId = knowledgeBaseId;
}

public String getDocumentId() {
return documentId;
}

public void setDocumentId(String documentId) {
this.documentId = documentId;
}

public String getContent() {
return content;
}

public void setContent(String content) {
this.content = content;
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
package com.baidubce.appbuilder.model.knowledgebase;

public class ChunkDeleteRequest {
private String knowledgeBaseId;
private String chunkId;

public void setKnowledgeBaseId(String knowledgeBaseId) {
this.knowledgeBaseId = knowledgeBaseId;
}

public String getKnowledgeBaseId() {
return knowledgeBaseId;
}

public void setChunkId(String chunkId) {
this.chunkId = chunkId;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
package com.baidubce.appbuilder.model.knowledgebase;

public class ChunkDescribeRequest {
private String knowledgeBaseId;
private String chunkId;

public void setChunkId(String chunkId) {
this.chunkId = chunkId;
}

public void setKnowledgeBaseId(String knowledgeBaseId) {
this.knowledgeBaseId = knowledgeBaseId;
}

public String getChunkId() {
return chunkId;
}

public String getKnowledgeBaseId() {
return knowledgeBaseId;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.baidubce.appbuilder.model.knowledgebase;

public class ChunkModifyRequest {
private String knowledgeBaseId;
private String chunkId;
private String content;
private boolean enable;
Expand All @@ -11,15 +12,42 @@ public ChunkModifyRequest(String chunkId, String content, boolean enable) {
this.enable = enable;
}

public ChunkModifyRequest(String knowledgeBaseId, String chunkId, String content, boolean enable) {
this.knowledgeBaseId = knowledgeBaseId;
this.chunkId = chunkId;
this.content = content;
this.enable = enable;
}

public String getKnowledgeBaseId() {
return knowledgeBaseId;
}

public void setKnowledgeBaseId(String knowledgeBaseId) {
this.knowledgeBaseId = knowledgeBaseId;
}

public String getChunkId() {
return chunkId;
}

public void setChunkId(String chunkId) {
this.chunkId = chunkId;
}

public String getContent() {
return content;
}

public void setContent(String content) {
this.content = content;
}

public boolean getEnable() {
return enable;
}

public void setEnable(boolean enable) {
this.enable = enable;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.baidubce.appbuilder.model.knowledgebase;

public class ChunksDescribeRequest {
private String knowledgeBaseId;
private String documentId;
private String marker;
private Integer maxKeys;
Expand All @@ -13,19 +14,52 @@ public ChunksDescribeRequest(String documentId, String marker, Integer maxKeys,
this.type = type;
}

public ChunksDescribeRequest(String knowledgeBaseId, String documentId, String marker, Integer maxKeys,
String type) {
this.knowledgeBaseId = knowledgeBaseId;
this.documentId = documentId;
this.marker = marker;
this.maxKeys = maxKeys;
this.type = type;
}

public String getKnowledgeBaseId() {
return knowledgeBaseId;
}

public void setKnowledgeBaseId(String knowledgeBaseId) {
this.knowledgeBaseId = knowledgeBaseId;
}

public String getDocumentId() {
return documentId;
}

public void setDocumentId(String documentId) {
this.documentId = documentId;
}

public String getMarker() {
return marker;
}

public void setMarker(String marker) {
this.marker = marker;
}

public Integer getMaxKeys() {
return maxKeys;
}

public void setMaxKeys(Integer maxKeys) {
this.maxKeys = maxKeys;
}

public String getType() {
return type;
}

public void setType(String type) {
this.type = type;
}
}
7 changes: 6 additions & 1 deletion python/core/console/knowledge_base/data_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ class DocumentProcessOption(BaseModel):
)
parser: Optional[DocumentChoices] = Field(
None,
description="解析方法(文字提取默认启动,参数不体现,layoutAnalysis版面分析,ocr按需增加)",
description="解析方法(文字提取默认启动,参数不体现,layoutAnalysis版面分析,ocr光学字符识别,pageImageAnalysis文档图片解析,chartAnalysis图表解析,tableAnalysis表格深度解析,按需增加)",
)
knowledgeAugmentation: Optional[DocumentChoices] = Field(
None,
Expand Down Expand Up @@ -230,6 +230,7 @@ class KnowledgeBaseUploadDocumentsResponse(BaseModel):


class CreateChunkRequest(BaseModel):
knowledgeBaseId: str = Field(None, description="知识库ID")
documentId: str = Field(..., description="文档ID")
content: str = Field(..., description="文档内容")

Expand All @@ -239,16 +240,19 @@ class CreateChunkResponse(BaseModel):


class ModifyChunkRequest(BaseModel):
knowledgeBaseId: str = Field(None, description="知识库ID")
chunkId: str = Field(..., description="切片ID")
content: str = Field(..., description="文档内容")
enable: bool = Field(..., description="是否启用")


class DeleteChunkRequest(BaseModel):
knowledgeBaseId: str = Field(None, description="知识库ID")
chunkId: str = Field(..., description="切片ID")


class DescribeChunkRequest(BaseModel):
knowledgeBaseId: str = Field(None, description="知识库ID")
chunkId: str = Field(..., description="切片ID")


Expand All @@ -269,6 +273,7 @@ class DescribeChunkResponse(BaseModel):


class DescribeChunksRequest(BaseModel):
knowledgeBaseId: str = Field(None, description="知识库ID")
documentId: str = Field(..., description="文档ID")
marker: Optional[str] = Field(None, description="起始位置")
maxKeys: Optional[int] = Field(
Expand Down
Loading

0 comments on commit bb03e0d

Please sign in to comment.