feat: Enhance ai-cache Plugin with Vector Similarity-Based LLM Cache …

…Recall and Multi-DB Support (#1248)
alibaba · Nov 21, 2024 · c2d405b · c2d405b
1 parent 6efb310
commit c2d405b
Show file tree

Hide file tree

Showing 9 changed files with 1,275 additions and 37 deletions.
diff --git a/plugins/wasm-go/extensions/ai-cache/README.md b/plugins/wasm-go/extensions/ai-cache/README.md
@@ -60,7 +60,7 @@ LLM 结果缓存插件，默认配置方式可以直接用于 openai 协议的
 | vector.apiKey | string | optional | ""  | 向量存储服务 API Key |
 | vector.topK | int | optional | 1 | 返回TopK结果，默认为 1 |
 | vector.timeout | uint32 | optional | 10000 | 请求向量存储服务的超时时间，单位为毫秒。默认值是10000，即10秒 |
-| vector.collectionID | string | optional | "" |  dashvector 向量存储服务 Collection ID |
+| vector.collectionID | string | optional | "" | 向量存储服务 Collection ID |
 | vector.threshold | float64 | optional | 1000 | 向量相似度度量阈值 |
 | vector.thresholdRelation | string | optional | lt | 相似度度量方式有 `Cosine`, `DotProduct`, `Euclidean` 等，前两者值越大相似度越高，后者值越小相似度越高。对于 `Cosine` 和 `DotProduct` 选择 `gt`，对于 `Euclidean` 则选择 `lt`。默认为 `lt`，所有条件包括 `lt` (less than，小于)、`lte` (less than or equal to，小等于)、`gt` (greater than，大于)、`gte` (greater than or equal to，大等于) |
 
@@ -99,6 +99,45 @@ LLM 结果缓存插件，默认配置方式可以直接用于 openai 协议的
 | responseTemplate | string | optional | `{"id":"ai-cache.hit","choices":[{"index":0,"message":{"role":"assistant","content":%s},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | 返回 HTTP 响应的模版，用 %s 标记需要被 cache value 替换的部分 |
 | streamResponseTemplate | string | optional | `data:{"id":"ai-cache.hit","choices":[{"index":0,"delta":{"role":"assistant","content":%s},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | 返回流式 HTTP 响应的模版，用 %s 标记需要被 cache value 替换的部分 |
 
+# 向量数据库提供商特有配置
+## Chroma
+Chroma 所对应的 `vector.type` 为 `chroma`。它并无特有的配置字段。需要提前创建 Collection，并填写 Collection ID 至配置项 `vector.collectionID`，一个 Collection ID 的示例为 `52bbb8b3-724c-477b-a4ce-d5b578214612`。
+
+## DashVector
+DashVector 所对应的 `vector.type` 为 `dashvector`。它并无特有的配置字段。需要提前创建 Collection，并填写 `Collection 名称` 至配置项 `vector.collectionID`。
+
+## ElasticSearch
+ElasticSearch 所对应的 `vector.type` 为 `elasticsearch`。需要提前创建 Index 并填写 Index Name 至配置项 `vector.collectionID` 。
+
+当前依赖于 [KNN](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html) 方法，请保证 ES 版本支持 `KNN`，当前已在 `8.16` 版本测试。
+
+它特有的配置字段如下：
+| 名称              | 数据类型 | 填写要求 | 默认值 | 描述                                                                          |
+|-------------------|----------|----------|--------|-------------------------------------------------------------------------------|
+| `vector.esUsername` | string   | 非必填   | -      | ElasticSearch 用户名 |
+| `vector.esPassword` | string | 非必填 | - | ElasticSearch 密码 |
+
+
+`vector.esUsername` 和 `vector.esPassword` 用于 Basic 认证。同时也支持 Api Key 认证，当填写了 `vector.apiKey` 时，则启用 Api Key 认证，如果使用 SaaS 版本需要填写 `encoded` 的值。
+
+## Milvus
+Milvus 所对应的 `vector.type` 为 `milvus`。它并无特有的配置字段。需要提前创建 Collection，并填写 Collection Name 至配置项 `vector.collectionID`。
+
+## Pinecone
+Pinecone 所对应的 `vector.type` 为 `pinecone`。它并无特有的配置字段。需要提前创建 Index，并填写 Index 访问域名至 `vector.serviceHost`。
+
+Pinecone 中的 `Namespace` 参数通过插件的 `vector.collectionID` 进行配置，如果不填写 `vector.collectionID`，则默认为 Default Namespace。
+
+## Qdrant
+Qdrant 所对应的 `vector.type` 为 `qdrant`。它并无特有的配置字段。需要提前创建 Collection，并填写 Collection Name 至配置项 `vector.collectionID`。
+
+## Weaviate
+Weaviate 所对应的 `vector.type` 为 `weaviate`。它并无特有的配置字段。
+需要提前创建 Collection，并填写 Collection Name 至配置项 `vector.collectionID`。
+
+需要注意的是 Weaviate 会设置首字母自动大写，在填写配置 `collectionID` 的时候需要将首字母设置为大写。
+
+如果使用 SaaS 需要填写 `vector.serviceHost` 参数。
 
 ## 配置示例
 ### 基础配置
@@ -144,4 +183,4 @@ GJSON PATH 支持条件判断语法，例如希望取最后一个 role 为 user
 
 ## 常见问题
 
-1. 如果返回的错误为 `error status returned by host: bad argument`，请检查`serviceName`是否正确包含了服务的类型后缀(.dns等)。
+1. 如果返回的错误为 `error status returned by host: bad argument`，请检查`serviceName`是否正确包含了服务的类型后缀(.dns等)。
diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/weaviate.go b/plugins/wasm-go/extensions/ai-cache/embedding/weaviate.go
diff --git a/plugins/wasm-go/extensions/ai-cache/vector/chroma.go b/plugins/wasm-go/extensions/ai-cache/vector/chroma.go
@@ -0,0 +1,201 @@
+package vector
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+)
+
+type chromaProviderInitializer struct{}
+
+func (c *chromaProviderInitializer) ValidateConfig(config ProviderConfig) error {
+	if len(config.collectionID) == 0 {
+		return errors.New("[Chroma] collectionID is required")
+	}
+	if len(config.serviceName) == 0 {
+		return errors.New("[Chroma] serviceName is required")
+	}
+	return nil
+}
+
+func (c *chromaProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	return &ChromaProvider{
+		config: config,
+		client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+			FQDN: config.serviceName,
+			Host: config.serviceHost,
+			Port: int64(config.servicePort),
+		}),
+	}, nil
+}
+
+type ChromaProvider struct {
+	config ProviderConfig
+	client wrapper.HttpClient
+}
+
+func (c *ChromaProvider) GetProviderType() string {
+	return PROVIDER_TYPE_CHROMA
+}
+
+func (d *ChromaProvider) QueryEmbedding(
+	emb []float64,
+	ctx wrapper.HttpContext,
+	log wrapper.Log,
+	callback func(results []QueryResult, ctx wrapper.HttpContext, log wrapper.Log, err error)) error {
+	// 最少需要填写的参数为 collection_id, embeddings 和 ids
+	// 下面是一个例子
+	// {
+	// 	"where": {}, // 用于 metadata 过滤，可选参数
+	// 	"where_document": {}, // 用于 document 过滤，可选参数
+	// 	"query_embeddings": [
+	// 	  [1.1, 2.3, 3.2]
+	// 	],
+	// 	"limit": 5,
+	// 	"include": [
+	// 	  "metadatas", // 可选
+	// 	  "documents", // 如果需要答案则需要
+	// 	  "distances"
+	// 	]
+	// }
+
+	requestBody, err := json.Marshal(chromaQueryRequest{
+		QueryEmbeddings: []chromaEmbedding{emb},
+		Limit:           d.config.topK,
+		Include:         []string{"distances", "documents"},
+	})
+
+	if err != nil {
+		log.Errorf("[Chroma] Failed to marshal query embedding request body: %v", err)
+		return err
+	}
+
+	return d.client.Post(
+		fmt.Sprintf("/api/v1/collections/%s/query", d.config.collectionID),
+		[][2]string{
+			{"Content-Type", "application/json"},
+		},
+		requestBody,
+		func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+			log.Debugf("[Chroma] Query embedding response: %d, %s", statusCode, responseBody)
+			results, err := d.parseQueryResponse(responseBody, log)
+			if err != nil {
+				err = fmt.Errorf("[Chroma] Failed to parse query response: %v", err)
+			}
+			callback(results, ctx, log, err)
+		},
+		d.config.timeout,
+	)
+}
+
+func (d *ChromaProvider) UploadAnswerAndEmbedding(
+	queryString string,
+	queryEmb []float64,
+	queryAnswer string,
+	ctx wrapper.HttpContext,
+	log wrapper.Log,
+	callback func(ctx wrapper.HttpContext, log wrapper.Log, err error)) error {
+	// 最少需要填写的参数为 collection_id, embeddings 和 ids
+	// 下面是一个例子
+	// {
+	// 	"embeddings": [
+	// 		  [1.1, 2.3, 3.2]
+	// 	],
+	// 	"ids": [
+	// 	  "你吃了吗？"
+	// 	],
+	//  "documents": [
+	//    "我吃了。"
+	//  ]
+	// }
+	// 如果要添加 answer，则按照以下例子
+	// {
+	// 	"embeddings": [
+	// 	  [1.1, 2.3, 3.2]
+	// 	],
+	// 	"documents": [
+	// 	  "answer1"
+	// 	],
+	// 	"ids": [
+	// 	  "id1"
+	// 	]
+	// }
+	requestBody, err := json.Marshal(chromaInsertRequest{
+		Embeddings: []chromaEmbedding{queryEmb},
+		IDs:        []string{queryString}, // queryString 指的是用户查询的问题
+		Documents:  []string{queryAnswer}, // queryAnswer 指的是用户查询的问题的答案
+	})
+
+	if err != nil {
+		log.Errorf("[Chroma] Failed to marshal upload embedding request body: %v", err)
+		return err
+	}
+
+	err = d.client.Post(
+		fmt.Sprintf("/api/v1/collections/%s/add", d.config.collectionID),
+		[][2]string{
+			{"Content-Type", "application/json"},
+		},
+		requestBody,
+		func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+			log.Debugf("[Chroma] statusCode:%d, responseBody:%s", statusCode, string(responseBody))
+			callback(ctx, log, err)
+		},
+		d.config.timeout,
+	)
+	return err
+}
+
+type chromaEmbedding []float64
+type chromaMetadataMap map[string]string
+type chromaInsertRequest struct {
+	Embeddings []chromaEmbedding   `json:"embeddings"`
+	Metadatas  []chromaMetadataMap `json:"metadatas,omitempty"` // 可选参数
+	Documents  []string            `json:"documents,omitempty"` // 可选参数
+	IDs        []string            `json:"ids"`
+}
+
+type chromaQueryRequest struct {
+	Where           map[string]string `json:"where,omitempty"`          // 可选参数
+	WhereDocument   map[string]string `json:"where_document,omitempty"` // 可选参数
+	QueryEmbeddings []chromaEmbedding `json:"query_embeddings"`
+	Limit           int               `json:"limit"`
+	Include         []string          `json:"include"`
+}
+
+type chromaQueryResponse struct {
+	Ids        [][]string          `json:"ids"`                  // 第一维是 batch query，第二维是查询到的多个 ids
+	Distances  [][]float64         `json:"distances,omitempty"`  // 与 Ids 一一对应
+	Metadatas  []chromaMetadataMap `json:"metadatas,omitempty"`  // 可选参数
+	Embeddings []chromaEmbedding   `json:"embeddings,omitempty"` // 可选参数
+	Documents  [][]string          `json:"documents,omitempty"`  // 与 Ids 一一对应
+	Uris       []string            `json:"uris,omitempty"`       // 可选参数
+	Data       []interface{}       `json:"data,omitempty"`       // 可选参数
+	Included   []string            `json:"included"`
+}
+
+func (d *ChromaProvider) parseQueryResponse(responseBody []byte, log wrapper.Log) ([]QueryResult, error) {
+	var queryResp chromaQueryResponse
+	err := json.Unmarshal(responseBody, &queryResp)
+	if err != nil {
+		return nil, err
+	}
+
+	log.Debugf("[Chroma] queryResp Ids len: %d", len(queryResp.Ids))
+	if len(queryResp.Ids) == 1 && len(queryResp.Ids[0]) == 0 {
+		return nil, errors.New("no query results found in response")
+	}
+	results := make([]QueryResult, 0, len(queryResp.Ids[0]))
+	for i := range queryResp.Ids[0] {
+		result := QueryResult{
+			Text:   queryResp.Ids[0][i],
+			Score:  queryResp.Distances[0][i],
+			Answer: queryResp.Documents[0][i],
+		}
+		results = append(results, result)
+	}
+	return results, nil
+}