Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement vector search experimental feature v2 (v1.6) #924

Merged
merged 14 commits into from
Feb 8, 2024
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ index.search(

This package guarantees compatibility with [version v1.x of Meilisearch](https://github.com/meilisearch/meilisearch/releases/latest), but some features may not be present. Please check the [issues](https://github.com/meilisearch/meilisearch-python/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3Aenhancement) for more info.

⚠️ This package is not compatible with the [`vectoreStore` experimental feature](https://www.meilisearch.com/docs/learn/experimental/vector_search) of Meilisearch v1.6.0 and later. More information on this [issue](https://github.com/meilisearch/meilisearch-python/issues/901).

## 💡 Learn more

Expand Down
1 change: 1 addition & 0 deletions meilisearch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class Paths:
separator_tokens = "separator-tokens"
non_separator_tokens = "non-separator-tokens"
swap = "swap-indexes"
embedders = "embedders"

def __init__(
self,
Expand Down
67 changes: 66 additions & 1 deletion meilisearch/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from meilisearch.config import Config
from meilisearch.errors import version_error_hint_message
from meilisearch.models.document import Document, DocumentsResults
from meilisearch.models.index import Faceting, IndexStats, Pagination, TypoTolerance
from meilisearch.models.index import Embedders, Faceting, IndexStats, Pagination, TypoTolerance
from meilisearch.models.task import Task, TaskInfo, TaskResults
from meilisearch.task import TaskHandler

Expand Down Expand Up @@ -1757,6 +1757,71 @@ def reset_non_separator_tokens(self) -> TaskInfo:

return TaskInfo(**task)

# EMBEDDERS SUB-ROUTES

def get_embedders(self) -> Embedders | None:
"""Get embedders of the index.

Returns
-------
settings:
The embedders settings of the index.

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
response = self.http.get(self.__settings_url_for(self.config.paths.embedders))

if not response:
return None

return Embedders(embedders=response)

def update_embedders(self, body: Union[Mapping[str, Any], None]) -> TaskInfo:
"""Update embedders of the index.

Parameters
----------
body: dict
Dictionary containing the embedders.

Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body)

return TaskInfo(**task)

def reset_embedders(self) -> TaskInfo:
"""Reset embedders of the index to default values.

Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.delete(
self.__settings_url_for(self.config.paths.embedders),
)

return TaskInfo(**task)

@staticmethod
def _batch(
documents: Sequence[Mapping[str, Any]], batch_size: int
Expand Down
25 changes: 24 additions & 1 deletion meilisearch/models/index.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, Dict, Iterator, List, Optional
from typing import Any, Dict, Iterator, List, Optional, Union

from camel_converter import to_snake
from camel_converter.pydantic_base import CamelBase
Expand Down Expand Up @@ -46,3 +46,26 @@ class TypoTolerance(CamelBase):
disable_on_attributes: Optional[List[str]] = None
disable_on_words: Optional[List[str]] = None
min_word_size_for_typos: Optional[MinWordSizeForTypos] = None


class OpenAiEmbedder(CamelBase):
source: str = "openAi"
model: Optional[str] = None # Defaults to text-embedding-ada-002
api_key: Optional[str] = None # Can be provided through a CLI option or environment variable
document_template: Optional[str] = None


class HuggingFaceEmbedder(CamelBase):
source: str = "huggingFace"
model: Optional[str] = None # Defaults to BAAI/bge-base-en-v1.5
revision: Optional[str] = None
document_template: Optional[str] = None


class UserProvidedEmbedder(CamelBase):
source: str = "userProvided"
dimensions: int


class Embedders(CamelBase):
embedders: Dict[str, Union[OpenAiEmbedder, HuggingFaceEmbedder, UserProvidedEmbedder]]
25 changes: 22 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,21 @@ def index_maker(index_uid=common.INDEX_UID, documents=small_movies):

@fixture(scope="function")
def index_with_documents_and_vectors(empty_index, small_movies):
small_movies[0]["_vectors"] = [0.1, 0.2]
small_movies[0]["_vectors"] = {"default": [0.1, 0.2]}

def index_maker(index_uid=common.INDEX_UID, documents=small_movies):
index = empty_index(index_uid)
task = index.add_documents(documents)
index.wait_for_task(task.task_uid)
settings_update_task = index.update_embedders(
{
"default": {
"source": "userProvided",
"dimensions": 2,
}
}
)
index.wait_for_task(settings_update_task.task_uid)
document_addition_task = index.add_documents(documents)
index.wait_for_task(document_addition_task.task_uid)
return index

return index_maker
Expand Down Expand Up @@ -216,3 +225,13 @@ def enable_vector_search():
json={"vectorStore": False},
timeout=10,
)


@fixture
def new_embedders():
return {
"default": {
"source": "userProvided",
"dimensions": 1,
}
}
6 changes: 1 addition & 5 deletions tests/index/test_index_search_meilisearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,9 @@ def test_attributes_to_search_on_search_no_match(index_with_documents):
assert response["hits"] == []


@pytest.mark.xfail(
strict=True, reason="https://github.com/meilisearch/meilisearch-python/issues/901"
)
@pytest.mark.usefixtures("enable_vector_search")
def test_vector_search(index_with_documents_and_vectors):
response = index_with_documents_and_vectors().search(
"How to Train Your Dragon", opt_params={"vector": [0.1, 0.2]}
"", opt_params={"vector": [0.1, 0.2], "hybrid": {"semanticRatio": 1.0}}
)
assert response["hits"][0]["id"] == "287947"
assert response["vector"] == [0.1, 0.2]
44 changes: 44 additions & 0 deletions tests/settings/test_settings_embedders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pytest

from meilisearch.models.index import Embedders


@pytest.mark.usefixtures("enable_vector_search")
def test_get_default_embedders(empty_index):
"""Tests getting default embedders."""
response = empty_index().get_embedders()

assert response is None


@pytest.mark.usefixtures("enable_vector_search")
def test_update_embedders_with_user_provided_source(new_embedders, empty_index):
"""Tests updating embedders."""
index = empty_index()
response_update = index.update_embedders(new_embedders)
update = index.wait_for_task(response_update.task_uid)
response_get = index.get_embedders()
assert update.status == "succeeded"
assert response_get == Embedders(embedders=new_embedders)


@pytest.mark.usefixtures("enable_vector_search")
def test_reset_embedders(new_embedders, empty_index):
"""Tests resetting the typo_tolerance setting to its default value."""
index = empty_index()

# Update the settings
response_update = index.update_embedders(new_embedders)
update1 = index.wait_for_task(response_update.task_uid)
# Get the setting after update
response_get = index.get_embedders()
# Reset the setting
response_reset = index.reset_embedders()
update2 = index.wait_for_task(response_reset.task_uid)
# Get the setting after reset
response_last = index.get_embedders()

assert update1.status == "succeeded"
assert response_get == Embedders(embedders=new_embedders)
assert update2.status == "succeeded"
assert response_last is None