diff --git a/appbuilder/__init__.py b/appbuilder/__init__.py index 08a37b982..e54f1f2c0 100644 --- a/appbuilder/__init__.py +++ b/appbuilder/__init__.py @@ -63,6 +63,10 @@ def check_version(self): from .core.components.doc_splitter.doc_splitter import DocSplitter from .core.components.retriever.bes.bes_retriever import BESRetriever from .core.components.retriever.bes.bes_retriever import BESVectorStoreIndex +from .core.components.retriever.baidu_vdb.baiduvdb_retriever import BaiduVDBVectorStoreIndex +from .core.components.retriever.baidu_vdb.baiduvdb_retriever import BaiduVDBRetriever +from .core.components.retriever.baidu_vdb.baiduvdb_retriever import TableParams + from .core.components.dish_recognize.component import DishRecognition from .core.components.translate.component import Translation from .core.components.animal_recognize.component import AnimalRecognition @@ -134,6 +138,10 @@ def check_version(self): "DocSplitter", "BESRetriever", "BESVectorStoreIndex", + "BaiduVDBVectorStoreIndex", + "BaiduVDBRetriever", + "TableParams", + 'DishRecognition', 'Translation', 'Message', diff --git a/appbuilder/core/components/retriever/__init__.py b/appbuilder/core/components/retriever/__init__.py index e69de29bb..04a53047f 100644 --- a/appbuilder/core/components/retriever/__init__.py +++ b/appbuilder/core/components/retriever/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2023 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .bes import BESVectorStoreIndex +from .bes import BESRetriever + +from .baidu_vdb import BaiduVDBVectorStoreIndex +from .baidu_vdb import BaiduVDBRetriever +from .baidu_vdb import TableParams \ No newline at end of file diff --git a/appbuilder/core/components/retriever/baidu_vdb/README.md b/appbuilder/core/components/retriever/baidu_vdb/README.md index f7a1c8b94..49bf74542 100644 --- a/appbuilder/core/components/retriever/baidu_vdb/README.md +++ b/appbuilder/core/components/retriever/baidu_vdb/README.md @@ -19,6 +19,10 @@ 以下是有关如何开始使用BaiduVDBRetriever的代码示例: +补充说明: +- `you_vdb_instance_id` 为VectorDB 实例ID,请替换为您的实例ID,在VectorDB控制台界面上可以查看 +- `your_api_key` 为您在VectorDB上申请的账户密钥,请替换为您自己的root账户密钥,在VectorDB控制台界面上可以查看 + ```python import os import appbuilder @@ -26,7 +30,6 @@ import appbuilder # 请前往千帆AppBuilder官网创建密钥,流程详见:https://cloud.baidu.com/doc/AppBuilder/s/Olq6grrt6#1%E3%80%81%E5%88%9B%E5%BB%BA%E5%AF%86%E9%92%A5 os.environ["APPBUILDER_TOKEN"] = '...' -embedding = appbuilder.Embedding() segments = appbuilder.Message(["文心一言大模型", "百度在线科技有限公司"]) # 初始化构建索引 vector_index = appbuilder.BaiduVDBVectorStoreIndex.from_params( @@ -52,21 +55,41 @@ os.environ["APPBUILDER_TOKEN"] = "bce-YOURTOKEN" ``` ### 初始化参数说明: +`BaiduVDBVectorStoreIndex()` 实例化参数说明: +- instance_id(str,必填):百度向量数据库的实例id,创建实例时获取 +- api_key (str,必填):连接向量数据库所需的密码,创建实例时获取 +- account (str,非必填):连接向量数据库所需的用户名,默认root +- database_name (str,非必填) :向量数据库的名称,默认为AppBuilderDatabase +- table_params (TableParams,非必填) :VectorDB table参数,参考链接[VectorDB table params](https://cloud.baidu.com/doc/VDB/s/mlrsob0p6) +- embedding (Embedding,非必填) :appbuilder.Embedding类型,若有构造好的Embedding,可以增量插入,否则默认新建embedding + +------- -- segments (Message[List[str]],必填):需要入库的文本段落 +`BaiduVDBVectorStoreIndex().from_params()` 构造函数参数说明: - instance_id(str,必填):百度向量数据库的实例id,创建实例时获取 - api_key (str,必填):连接向量数据库所需的密码,创建实例时获取 - account (str,非必填):连接向量数据库所需的用户名,默认root -- embedding (obj,非必填):用于将文本转为向量的模型,默认为Embedding +- database_name (str,非必填) :向量数据库的名称,默认为AppBuilderDatabase +- table_name (str,非必填) :向量数据库的表名,默认为AppBuilderTable - drop_exists (bool, 非必填) :是否清空数据库历史记录,默认为False +------- + + ### 调用参数: + +`BaiduVDBRetriever().run()` 函数参数说明: + | 参数名称 | 参数类型 |是否必须 | 描述 | 示例值 | |---------|--------|--------|------------------|---------------| -| message | String |是 | 需要检索的内容 | "中国2023人均GDP" | -| top_k | int |否 | 返回相似度最高的top_k个内容 | 1 | +| message | String |是 | 需要检索的内容, 类型为Message,content类型为str, 长度要求(0,1000) | "中国2023人均GDP" | +| top_k | int |否 | 返回相似度最高的top_k个内容,top_k的数值范围(1,embedding索引数量] | 1 | + ### 响应参数 + +`BaiduVDBRetriever().run()` 函数返回值说明: + | 参数名称 | 参数类型 | 描述 | 示例值 | |------|--------|-----|--------------------| | text | string | 检索结果 | "中国2023年人均GDP8.94万元" | diff --git a/appbuilder/core/components/retriever/baidu_vdb/__init__.py b/appbuilder/core/components/retriever/baidu_vdb/__init__.py index e69de29bb..f62cda550 100644 --- a/appbuilder/core/components/retriever/baidu_vdb/__init__.py +++ b/appbuilder/core/components/retriever/baidu_vdb/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2023 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .baiduvdb_retriever import BaiduVDBVectorStoreIndex +from .baiduvdb_retriever import BaiduVDBRetriever +from .baiduvdb_retriever import TableParams \ No newline at end of file diff --git a/appbuilder/core/components/retriever/baidu_vdb/baiduvdb_retriever.py b/appbuilder/core/components/retriever/baidu_vdb/baiduvdb_retriever.py index 811d9e223..ebded80e0 100644 --- a/appbuilder/core/components/retriever/baidu_vdb/baiduvdb_retriever.py +++ b/appbuilder/core/components/retriever/baidu_vdb/baiduvdb_retriever.py @@ -58,6 +58,7 @@ "Unsupported metric type: `{}`, supported metric types are {}" ) + def _try_import() -> None: try: import pymochow @@ -66,6 +67,7 @@ def _try_import() -> None: "`pymochow` package not found, please run `pip install pymochow`" ) + class TableParams: """Baidu VectorDB table params. See the following documentation for details: @@ -102,6 +104,7 @@ def __init__( self.drop_exists = drop_exists self.vector_params = vector_params + class BaiduVDBVectorStoreIndex: """ Baidu VDB向量存储检索工具 @@ -110,19 +113,43 @@ class BaiduVDBVectorStoreIndex: def __init__( self, - instance_id, + instance_id: str, api_key: str, account: str = DEFAULT_ACCOUNT, database_name: str = DEFAULT_DATABASE_NAME, table_params: TableParams = TableParams(dimension=384), embedding=None, ): + if not isinstance(instance_id, str): + raise TypeError( + "Parameter `instance_id` must be a string, but got {}".format( + type(instance_id))) + if not isinstance(api_key, str): + raise TypeError( + "Parameter `api_key` must be a string, but got {}".format( + type(api_key))) + if not isinstance(account, str): + raise TypeError( + "Parameter `account` must be a string, but got {}".format( + type(account))) + if not isinstance(database_name, str): + raise TypeError( + "Parameter `database_name` must be a string, but got {}".format( + type(database_name))) + if not isinstance(table_params, TableParams): + raise TypeError( + "Parameter `table_params` must be a TableParams, but got {}".format( + type(table_params))) + if embedding is not None and not isinstance(embedding, Embedding): + raise TypeError( + "Parameter `embedding` must be a Embedding, but got {}".format( + type(embedding))) if embedding is None: embedding = Embedding() self.embedding = embedding - + self._init_client(instance_id, account, api_key) self._create_database_if_not_exists(database_name) self._create_table(table_params) @@ -135,13 +162,16 @@ def _init_client(self, instance_id, account, api_key): from pymochow.configuration import Configuration from pymochow.auth.bce_credentials import AppBuilderCredentials - gateway = os.getenv("GATEWAY_URL") if os.getenv("GATEWAY_URL") else GATEWAY_URL + gateway = os.getenv("GATEWAY_URL") if os.getenv( + "GATEWAY_URL") else GATEWAY_URL appbuilder_token = os.getenv("APPBUILDER_TOKEN") + uri_prefix = self.vdb_uri_prefix + instance_id.encode('utf-8') config = Configuration( - credentials=AppBuilderCredentials(account, api_key, appbuilder_token), + credentials=AppBuilderCredentials( + account, api_key, appbuilder_token), endpoint=gateway, - uri_perfix=self.vdb_uri_prefix, + uri_prefix=uri_prefix, connection_timeout_in_mills=DEFAULT_TIMEOUT_IN_MILLS, ) self.vdb_client = pymochow.MochowClient(config) @@ -196,7 +226,10 @@ def _create_table_in_db( fields.append(Field(FIELD_TEXT, FieldType.STRING)) fields.append( Field( - FIELD_VECTOR, FieldType.FLOAT_VECTOR, dimension=table_params.dimension + FIELD_VECTOR, + FieldType.FLOAT_VECTOR, + dimension=table_params.dimension, + not_null=True, ) ) @@ -221,7 +254,7 @@ def _create_table_in_db( ) # need wait 10s to wait proxy sync meta time.sleep(10) - + @staticmethod def _get_index_params(index_type: Any, table_params: TableParams) -> None: from pymochow.model.enum import IndexType @@ -248,7 +281,8 @@ def _get_index_type(index_type_value: str) -> Any: try: return IndexType(index_type_value) except ValueError: - support_index_types = [d.value for d in IndexType.__members__.values()] + support_index_types = [ + d.value for d in IndexType.__members__.values()] raise ValueError( NOT_SUPPORT_INDEX_TYPE_ERROR.format( index_type_value, support_index_types @@ -263,7 +297,8 @@ def _get_metric_type(metric_type_value: str) -> Any: try: return MetricType(metric_type_value.upper()) except ValueError: - support_metric_types = [d.value for d in MetricType.__members__.values()] + support_metric_types = [ + d.value for d in MetricType.__members__.values()] raise ValueError( NOT_SUPPORT_METRIC_TYPE_ERROR.format( metric_type_value, support_metric_types @@ -297,14 +332,16 @@ def add_segments(self, segments: Message, metadata=""): segment_vectors = segment_vectors.content vector_dims = len(segment_vectors[0]) segments = segments.content - + if len(segments) == 0: + raise ValueError("add_segments函数 参数segment 内容为空") + rows = [] for segment, vector in zip(segments, segment_vectors): row = Row(text=segment, vector=vector, metadata=metadata) rows.append(row) if len(rows) >= DEFAULT_BATCH_SIZE: - self.collection.upsert(rows=rows) - rows = [] + self.collection.upsert(rows=rows) + rows = [] if len(rows) > 0: self.table.upsert(rows=rows) @@ -320,10 +357,47 @@ def from_params( drop_exists: bool = False, **kwargs, ): + """ + 从参数中实例化类。 + + Args: + cls: 类对象,即当前函数所属的类。 + instance_id: str,实例ID。 + api_key: str,API密钥。 + account: str,账户名,默认为root。 + database_name: str,数据库名,默认为AppBuilderDatabase。 + table_name: str,表名,默认为AppBuilderTable。 + drop_exists: bool,是否删除已存在的表,默认为False。 + **kwargs: 其他参数,可选的维度参数dimension默认为384。 + + Returns: + 类实例,包含实例ID、账户名、API密钥、数据库名、表参数等属性。 + + """ _try_import() dimension = kwargs.get("dimension", 384) + + if not isinstance(instance_id, str): + raise TypeError("instance_id must be a string. but got {}".format( + type(instance_id))) + if not isinstance(api_key, str): + raise TypeError("api_key must be a string. but got {}".format( + type(api_key))) + if not isinstance(account, str): + raise TypeError("account must be a string. but got {}".format( + type(account))) + if not isinstance(database_name, str): + raise TypeError("database_name must be a string. but got {}".format( + type(database_name))) + if not isinstance(table_name, str): + raise TypeError("table_name must be a string. but got {}".format( + type(table_name))) + if not isinstance(drop_exists, bool): + raise TypeError("drop_exists must be a boolean. but got {}".format( + type(drop_exists))) + table_params = TableParams( - dimension=dimension, + dimension=dimension, table_name=table_name, drop_exists=drop_exists, ) @@ -353,7 +427,7 @@ class BaiduVDBRetriever(Component): self.api_key, ) vector_index.add_segments(segments) - + query = appbuilder.Message("文心一言") time.sleep(5) retriever = vector_index.as_retriever() @@ -361,7 +435,8 @@ class BaiduVDBRetriever(Component): """ name: str = "BaiduVectorDBRetriever" - tool_desc: Dict[str, Any] = {"description": "a retriever based on Baidu VectorDB"} + tool_desc: Dict[str, Any] = { + "description": "a retriever based on Baidu VectorDB"} def __init__(self, embedding, table): super().__init__() @@ -381,13 +456,33 @@ def run(self, query: Message, top_k: int = 1): from pymochow.model.table import AnnSearch, HNSWSearchParams from pymochow.model.enum import ReadConsistency + if not isinstance(query, Message): + raise TypeError("Parameter `query` must be a Message, but got {}" + .format(type(query))) + if not isinstance(top_k, int): + raise TypeError("Parameter `top_k` must be a int, but got {}" + .format(type(top_k))) + if top_k <= 0: + raise ValueError("Parameter `top_k` must be a positive integer, but got {}" + .format(top_k)) + + content = query.content + if not isinstance(content, str): + raise ValueError("Parameter `query` content is not a string, got: {}" + .format(type(content))) + if len(content) == 0: + raise ValueError("Parameter `query` content is empty") + if len(content) > 1000: + raise ValueError("Parameter `query` content is too long, max length per batch size is 1000") + query_embedding = self.embedding(query) anns = AnnSearch( vector_field=FIELD_VECTOR, vector_floats=query_embedding.content, params=HNSWSearchParams(ef=10, limit=top_k), ) - res = self.table.search(anns=anns, read_consistency=ReadConsistency.STRONG) + res = self.table.search( + anns=anns, read_consistency=ReadConsistency.STRONG) rows = res.rows docs = [] if rows is None or len(rows) == 0: diff --git a/appbuilder/core/components/retriever/bes/__init__.py b/appbuilder/core/components/retriever/bes/__init__.py index e69de29bb..9c060610c 100644 --- a/appbuilder/core/components/retriever/bes/__init__.py +++ b/appbuilder/core/components/retriever/bes/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2023 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .bes_retriever import BESVectorStoreIndex +from .bes_retriever import BESRetriever diff --git a/appbuilder/tests/test_vdb_retriever.py b/appbuilder/tests/test_vdb_retriever.py new file mode 100644 index 000000000..54874c925 --- /dev/null +++ b/appbuilder/tests/test_vdb_retriever.py @@ -0,0 +1,116 @@ +# Copyright (c) 2023 Baidu, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# !/usr/bin/env python3 + + +import os +import unittest +from typing import List, Tuple +import appbuilder + + +class TestBaiduVDBRetrieverParameter(unittest.TestCase): + def setUp(self) -> None: + os.environ["APPBUILDER_TOKEN"] = "bce-v3/ABCDE" + + def test_run_parameter_query(self): + query = appbuilder.Message() + retriever = appbuilder.BaiduVDBRetriever( + embedding="abcde", + table="abcde") + + with self.assertRaises(ValueError) as context: + retriever.run(query) + self.assertIn("Parameter `query` content is empty", str(context.exception)) + + def test_run_paramter_query_type(self): + query = appbuilder.Message(content=12345) + + retriever = appbuilder.BaiduVDBRetriever( + embedding="abcde", + table="abcde") + + with self.assertRaises(ValueError) as context: + retriever.run(query) + self.assertIn("Parameter `query` content is not a string", str(context.exception)) + + def test_run_parameter_query_length(self): + query = appbuilder.Message(content="a" * 1025) + retriever = appbuilder.BaiduVDBRetriever( + embedding="abcde", + table="abcde") + with self.assertRaises(ValueError) as context: + retriever.run(query) + self.assertIn("Parameter `query` content is too long", str(context.exception)) + + def test_run_parameter_topk_positive(self): + query = appbuilder.Message() + retriever = appbuilder.BaiduVDBRetriever( + embedding="abcde", + table="abcde") + with self.assertRaises(TypeError) as context: + retriever.run(query, topk=-1) + self.assertIn("Parameter `top_k` must be a positive integer", str(context.exception)) + + def test_run_parameter_topk(self): + query = appbuilder.Message() + retriever = appbuilder.BaiduVDBRetriever( + embedding="abcde", + table="abcde") + with self.assertRaises(TypeError) as context: + retriever.run(query, topk="abc") + self.assertIn("Parameter `top_k` must be a int", str(context.exception)) + + + +class TestVDBParameterCheck(unittest.TestCase): + def setUp(self) -> None: + os.environ["APPBUILDER_TOKEN"] = "bce-v3/ABCDE" + + def test_vdb_parameter_account(self): + with self.assertRaises(TypeError) as context: + appbuilder.BaiduVDBVectorStoreIndex( + instance_id="abcde", + api_key="abcde", + account=123456) + self.assertIn("must be a string", str(context.exception)) + + def test_vdb_parameter_database_name(self): + with self.assertRaises(TypeError) as context: + appbuilder.BaiduVDBVectorStoreIndex( + instance_id="abcde", + api_key="abcde", + database_name=123456) + self.assertIn("must be a string", str(context.exception)) + + + def test_vdb_parameter_table_params(self): + with self.assertRaises(TypeError) as context: + appbuilder.BaiduVDBVectorStoreIndex( + instance_id="abcde", + api_key="abcde", + table_params=123456) + self.assertIn("must be a TableParams", str(context.exception)) + + def test_vdb_parameter_embedding(self): + with self.assertRaises(TypeError) as context: + appbuilder.BaiduVDBVectorStoreIndex( + instance_id="abcde", + api_key="abcde", + embedding=123456) + self.assertIn("must be a Embedding", str(context.exception)) + + +if __name__ == '__main__': + unittest.main() diff --git a/cookbooks/vdb_retriever.ipynb b/cookbooks/vdb_retriever.ipynb new file mode 100644 index 000000000..215d89c1e --- /dev/null +++ b/cookbooks/vdb_retriever.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VectorDB\n", + "\n", + "## 概述\n", + "\n", + "VectorDB产品基于百度自研的“莫愁”向量数据库内核构建,充分利用了百度内部在分布式存储&数据库领域的成熟经验,实现了高可靠、高可用、强扩展和大规模的能力。详细介绍可以参考文档 [VectorDB](https://cloud.baidu.com/doc/VDB/s/Llrsoaz3l)\n", + "\n", + "## 应用场景\n", + "\n", + "### 场景一:信息相似度检索\n", + "\n", + "在当今的信息时代,快速而准确地检索文本和图片数据成为了一个重要挑战。我们的向量数据库针对这一挑战,提供了一个高效、安全且智能的解决方案。\n", + "\n", + "### 场景二:大模型问答记忆\n", + "\n", + "在利用大型语言模型进行问答交互时,保持模型与用户之间的会话连贯性是提高回答质量和相关性的关键。我们的解决方案通过实时存储和检索会话数据,有效地降低了幻觉情况的发生,提升了问答的准确性和用户体验。\n", + "\n", + "\n", + "### 场景三:大模型私域知识库\n", + "\n", + "在私有云环境中,构建专属的私域知识库是提升业务决策效率和精确性的关键。我们的向量数据库解决方案充分利用大模型的推理能力,针对私域数据提供定制化、高效的知识管理和检索服务。" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 开发上手\n", + "\n", + "### 第一步:环境准备,安装依赖" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install appbuilder-sdk" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 第二步:平台注册,设置Token\n", + "\n", + "- 2.1、注册AppBuilder平台账户,并申请Token,可参考文档:[AppBuilder官网创建密钥](https://cloud.baidu.com/doc/AppBuilder/s/Olq6grrt6#1%E3%80%81%E5%88%9B%E5%BB%BA%E5%AF%86%E9%92%A5)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# AppBuilder平台注册后获取密钥\n", + "import os\n", + "import appbuilder\n", + "\n", + "# 请前往千帆AppBuilder官网创建密钥,流程详见:https://cloud.baidu.com/doc/AppBuilder/s/Olq6grrt6#1%E3%80%81%E5%88%9B%E5%BB%BA%E5%AF%86%E9%92%A5\n", + "os.environ[\"APPBUILDER_TOKEN\"] = '...'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- 2.2、同时您需要申请并开通VectorDB,并创建实例,具体操作请参考[VectorDB文档](https://cloud.baidu.com/doc/VDB/s/hlrsoazuf)\n", + "\n", + "\n", + "### 第三步:创建并运行VectorDB,初始化构建索引\n", + "\n", + "补充说明:\n", + "- `you_vdb_instance_id` 为VectorDB 实例ID,请替换为您的实例ID,在VectorDB控制台界面上可以查看\n", + "- `your_api_key` 为您在VectorDB上申请的账户密钥,请替换为您自己的root账户密钥,在VectorDB控制台界面上可以查看" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "segments = appbuilder.Message([\"文心一言大模型\", \"百度在线科技有限公司\"])\n", + "# 初始化构建索引\n", + "vector_index = appbuilder.BaiduVDBVectorStoreIndex.from_params(\n", + " instance_id=\"your_instance_id\",\n", + " api_key=\"your_api_key\",\n", + " drop_exists=True,\n", + ")\n", + "vector_index.add_segments(segments)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 第四步:构建完毕索引后,可以通过retriever进行检索" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = appbuilder.Message(\"文心一言\")\n", + "retriever = vector_index.as_retriever()\n", + "res = retriever(query)\n", + "print(res)\n", + "\n", + "# Message(name=msg, content=[{'text': '文心一言大模型', 'meta': '', 'score': 1.0}], mtype=list)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "以上,便完成了VDB作为检索工具参与Appbuilder运行的流程使用,更多的VDB使用方式,请参考[VDB官方文档](https://cloud.baidu.com/doc/VDB/index.html) 与 [AppBuilder-retriever-baidu_vdb 文档](../appbuilder/core/components/retriever/baidu_vdb/README.md)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}