Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python: Weaviate vector store #9101

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion python/.coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ omit =
semantic_kernel/connectors/memory/qdrant/qdrant_memory_store.py
semantic_kernel/connectors/memory/redis/redis_memory_store.py
semantic_kernel/connectors/memory/usearch/*
semantic_kernel/connectors/memory/weaviate/*
semantic_kernel/connectors/memory/weaviate/weaviate_memory_store.py
semantic_kernel/reliability/*
semantic_kernel/memory/*

Expand Down
5 changes: 4 additions & 1 deletion python/.cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"endregion",
"genai",
"generativeai",
"hnsw",
"httpx",
"huggingface",
"kernelfunction",
Expand All @@ -58,7 +59,9 @@
"skprompt",
"templating",
"vectordb",
"vectorizer",
"vectorstoremodel",
"vertexai"
"vertexai",
"Weaviate"
]
}
10 changes: 2 additions & 8 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,23 @@ dependencies = [
"pydantic ~= 2.0",
"pydantic-settings ~= 2.0",
"defusedxml ~= 0.7",

# azure identity
"azure-identity ~= 1.13",

# embeddings
"numpy >= 1.25.0; python_version < '3.12'",
"numpy >= 1.26.0; python_version >= '3.12'",

# openai connector
"openai ~= 1.0",

# openapi and swagger
"openapi_core >= 0.18,<0.20",

# OpenTelemetry
"opentelemetry-api ~= 1.24",
"opentelemetry-sdk ~= 1.24",
"prance ~= 23.6.21.0",

# templating
"pybars4 ~= 0.9",
"jinja2 ~= 3.1",
"nest-asyncio ~= 1.6"
"nest-asyncio ~= 1.6",
]

### Optional dependencies
Expand Down Expand Up @@ -109,7 +103,7 @@ usearch = [
"pyarrow >= 12.0,<18.0"
]
weaviate = [
"weaviate-client >= 3.18,<5.0"
"weaviate-client>=4.7,<5.0",
]
pandas = [
"pandas ~= 2.2"
Expand Down
8 changes: 8 additions & 0 deletions python/samples/concepts/memory/new_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from semantic_kernel.connectors.memory.qdrant import QdrantCollection
from semantic_kernel.connectors.memory.redis import RedisHashsetCollection, RedisJsonCollection
from semantic_kernel.connectors.memory.volatile import VolatileCollection
from semantic_kernel.connectors.memory.weaviate.weaviate_collection import WeaviateCollection
from semantic_kernel.data import (
VectorStoreRecordCollection,
VectorStoreRecordDataField,
Expand Down Expand Up @@ -79,6 +80,9 @@ class MyDataModelList:
# - redis_hashset: Redis Hashset
# - qdrant: Qdrant
# - volatile: In-memory store
# - weaviate: Weaviate
# Please either configure the weaviate settings via environment variables or provide them through the constructor.
# Note that embed mode is not supported on Windows: https://github.com/weaviate/weaviate/issues/3315
#
# This is represented as a mapping from the store name to a
# function which returns the store.
Expand Down Expand Up @@ -109,6 +113,10 @@ class MyDataModelList:
data_model_type=MyDataModel,
collection_name=collection_name,
),
"weaviate": lambda: WeaviateCollection[MyDataModel](
data_model_type=MyDataModel,
collection_name=collection_name,
),
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def data_model_definition_to_azure_ai_search_index(

for field in definition.fields.values():
if isinstance(field, VectorStoreRecordDataField):
assert field.name # nosec
if not field.property_type:
logger.debug(f"Field {field.name} has not specified type, defaulting to Edm.String.")
type_ = TYPE_MAPPER_DATA[field.property_type or "default"]
Expand Down Expand Up @@ -119,6 +120,7 @@ def data_model_definition_to_azure_ai_search_index(
)
)
elif isinstance(field, VectorStoreRecordVectorField):
assert field.name # nosec
if not field.property_type:
logger.debug(f"Field {field.name} has not specified type, defaulting to Collection(Edm.Single).")
if not field.index_kind:
Expand Down
17 changes: 17 additions & 0 deletions python/semantic_kernel/connectors/memory/weaviate/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Weaviate Memory Connector

[Weaviate](https://weaviate.io/developers/weaviate) is an open source vector database. Semantic Kernel provides a connector to allow you to store and retrieve information for you AI applications from a Weaviate database.

## Setup

There are a few ways you can deploy your Weaviate database:
- [Weaviate Cloud](https://weaviate.io/developers/weaviate/installation/weaviate-cloud-services)
- [Docker](https://weaviate.io/developers/weaviate/installation/docker-compose)
- [Embedded](https://weaviate.io/developers/weaviate/installation/embedded)
- Other cloud providers such as [AWS](https://weaviate.io/developers/weaviate/installation/aws-marketplace) or [GCP](https://weaviate.io/developers/weaviate/installation/gc-marketplace).

> Note that embedded mode is not supported on Windows yet: [GitHub issue](https://github.com/weaviate/weaviate/issues/3315)

## Using the Connector

Once the Weaviate database is up and running, and the environment variables are set, you can use the connector in your Semantic Kernel application. Please refer to this sample to see how to use the connector: [Weaviate Connector Sample](../../../../samples/concepts/memory/new_memory.py)
15 changes: 15 additions & 0 deletions python/semantic_kernel/connectors/memory/weaviate/const.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) Microsoft. All rights reserved.

from weaviate.classes.config import DataType

TYPE_MAPPER_DATA = {
"str": DataType.TEXT,
"int": DataType.INT,
"float": DataType.NUMBER,
"bool": DataType.BOOL,
"list[str]": DataType.TEXT_ARRAY,
"list[int]": DataType.INT_ARRAY,
"list[float]": DataType.NUMBER_ARRAY,
"list[bool]": DataType.BOOL_ARRAY,
"default": DataType.TEXT,
}
Loading
Loading