elastic · maxjakob · Apr 30, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 18, 2024
diff --git a/elasticsearch/helpers/vectorstore/_async/_utils.py b/elasticsearch/helpers/vectorstore/_async/_utils.py
@@ -31,9 +31,9 @@ async def model_must_be_deployed(client: AsyncElasticsearch, model_id: str) -> N
         pass
 
 
-async def model_is_deployed(es_client: AsyncElasticsearch, model_id: str) -> bool:
+async def model_is_deployed(client: AsyncElasticsearch, model_id: str) -> bool:
     try:
-        await model_must_be_deployed(es_client, model_id)
+        await model_must_be_deployed(client, model_id)
         return True
     except NotFoundError:
         return False
diff --git a/elasticsearch/helpers/vectorstore/_async/embedding_service.py b/elasticsearch/helpers/vectorstore/_async/embedding_service.py
@@ -52,7 +52,8 @@ class AsyncElasticsearchEmbeddings(AsyncEmbeddingService):
 
     def __init__(
         self,
-        es_client: AsyncElasticsearch,
+        *,
+        client: AsyncElasticsearch,
         model_id: str,
         input_field: str = "text_field",
         user_agent: str = f"elasticsearch-py-es/{lib_version}",
@@ -63,14 +64,14 @@ def __init__(
         :param model_id: The model_id of the model deployed in the Elasticsearch cluster.
         :param input_field: The name of the key for the input text field in the
             document. Defaults to 'text_field'.
-        :param es_client: Elasticsearch client connection. Alternatively specify the
+        :param client: Elasticsearch client connection. Alternatively specify the
             Elasticsearch connection with the other es_* parameters.
         """
         # Add integration-specific usage header for tracking usage in Elastic Cloud.
         # client.options preserves existing (non-user-agent) headers.
-        es_client = es_client.options(headers={"User-Agent": user_agent})
+        client = client.options(headers={"User-Agent": user_agent})
 
-        self.es_client = es_client
+        self.client = client
         self.model_id = model_id
         self.input_field = input_field
 
@@ -82,7 +83,7 @@ async def embed_query(self, text: str) -> List[float]:
         return result[0]
 
     async def _embedding_func(self, texts: List[str]) -> List[List[float]]:
-        response = await self.es_client.ml.infer_trained_model(
+        response = await self.client.ml.infer_trained_model(
             model_id=self.model_id, docs=[{self.input_field: text} for text in texts]
         )
         return [doc["predicted_value"] for doc in response["inference_results"]]
diff --git a/elasticsearch/helpers/vectorstore/_async/strategies.py b/elasticsearch/helpers/vectorstore/_async/strategies.py
@@ -27,6 +27,7 @@ class AsyncRetrievalStrategy(ABC):
     @abstractmethod
     def es_query(
         self,
+        *,
         query: Optional[str],
         query_vector: Optional[List[float]],
         text_field: str,
@@ -51,6 +52,7 @@ def es_query(
     @abstractmethod
     def es_mappings_settings(
         self,
+        *,
         text_field: str,
         vector_field: str,
         num_dimensions: Optional[int],
@@ -60,13 +62,15 @@ def es_mappings_settings(
         creating inference pipelines or checking if a required model was deployed.
 
         :param client: Elasticsearch client connection.
-        :param index_name: The name of the Elasticsearch index to create.
-        :param metadata_mapping: Flat dictionary with field and field type pairs that
-            describe the schema of the metadata.
+        :param text_field: The field containing the text data in the index.
+        :param vector_field: The field containing the vector representations in the index.
+        :param num_dimensions: If vectors are indexed, how many dimensions do they have.
+
+        :return: Dictionary with field and field type pairs that describe the schema.
         """
 
     async def before_index_creation(
-        self, client: AsyncElasticsearch, text_field: str, vector_field: str
+        self, *, client: AsyncElasticsearch, text_field: str, vector_field: str
     ) -> None:
         """
         Executes before the index is created. Used for setting up
@@ -101,6 +105,7 @@ def __init__(self, model_id: str = ".elser_model_2"):
 
     def es_query(
         self,
+        *,
         query: Optional[str],
         query_vector: Optional[List[float]],
         text_field: str,
@@ -138,6 +143,7 @@ def es_query(
 
     def es_mappings_settings(
         self,
+        *,
         text_field: str,
         vector_field: str,
         num_dimensions: Optional[int],
@@ -154,7 +160,7 @@ def es_mappings_settings(
         return mappings, settings
 
     async def before_index_creation(
-        self, client: AsyncElasticsearch, text_field: str, vector_field: str
+        self, *, client: AsyncElasticsearch, text_field: str, vector_field: str
     ) -> None:
         if self.model_id:
             await model_must_be_deployed(client, self.model_id)
@@ -183,6 +189,7 @@ class AsyncDenseVectorStrategy(AsyncRetrievalStrategy):
 
     def __init__(
         self,
+        *,
         distance: DistanceMetric = DistanceMetric.COSINE,
         model_id: Optional[str] = None,
         hybrid: bool = False,
@@ -202,6 +209,7 @@ def __init__(
 
     def es_query(
         self,
+        *,
         query: Optional[str],
         query_vector: Optional[List[float]],
         text_field: str,
@@ -236,6 +244,7 @@ def es_query(
 
     def es_mappings_settings(
         self,
+        *,
         text_field: str,
         vector_field: str,
         num_dimensions: Optional[int],
@@ -265,7 +274,7 @@ def es_mappings_settings(
         return mappings, {}
 
     async def before_index_creation(
-        self, client: AsyncElasticsearch, text_field: str, vector_field: str
+        self, *, client: AsyncElasticsearch, text_field: str, vector_field: str
     ) -> None:
         if self.model_id:
             await model_must_be_deployed(client, self.model_id)
@@ -314,6 +323,7 @@ def __init__(self, distance: DistanceMetric = DistanceMetric.COSINE) -> None:
 
     def es_query(
         self,
+        *,
         query: Optional[str],
         query_vector: Optional[List[float]],
         text_field: str,
@@ -365,6 +375,7 @@ def es_query(
 
     def es_mappings_settings(
         self,
+        *,
         text_field: str,
         vector_field: str,
         num_dimensions: Optional[int],
@@ -396,6 +407,7 @@ def __init__(
 
     def es_query(
         self,
+        *,
         query: Optional[str],
         query_vector: Optional[List[float]],
         text_field: str,
@@ -423,6 +435,7 @@ def es_query(
 
     def es_mappings_settings(
         self,
+        *,
         text_field: str,
         vector_field: str,
         num_dimensions: Optional[int],

diff --git a/elasticsearch/helpers/vectorstore/_async/vectorstore.py b/elasticsearch/helpers/vectorstore/_async/vectorstore.py
@@ -50,8 +50,9 @@ class AsyncVectorStore:
 
     def __init__(
         self,
-        es_client: AsyncElasticsearch,
-        index_name: str,
+        client: AsyncElasticsearch,
+        *,
+        index: str,
         retrieval_strategy: AsyncRetrievalStrategy,
         embedding_service: Optional[AsyncEmbeddingService] = None,
         num_dimensions: Optional[int] = None,
@@ -63,26 +64,26 @@ def __init__(
         """
         :param user_header: user agent header specific to the 3rd party integration.
             Used for usage tracking in Elastic Cloud.
-        :param index_name: The name of the index to query.
+        :param index: The name of the index to query.
         :param retrieval_strategy: how to index and search the data. See the strategies
             module for availble strategies.
         :param text_field: Name of the field with the textual data.
         :param vector_field: For strategies that perform embedding inference in Python,
             the embedding vector goes in this field.
-        :param es_client: Elasticsearch client connection. Alternatively specify the
+        :param client: Elasticsearch client connection. Alternatively specify the
             Elasticsearch connection with the other es_* parameters.
         """
         # Add integration-specific usage header for tracking usage in Elastic Cloud.
         # client.options preserves existing (non-user-agent) headers.
-        es_client = es_client.options(headers={"User-Agent": user_agent})
+        client = client.options(headers={"User-Agent": user_agent})
 
         if hasattr(retrieval_strategy, "text_field"):
             retrieval_strategy.text_field = text_field
         if hasattr(retrieval_strategy, "vector_field"):
             retrieval_strategy.vector_field = vector_field
 
-        self.es_client = es_client
-        self.index_name = index_name
+        self.client = client
+        self.index = index
         self.retrieval_strategy = retrieval_strategy
         self.embedding_service = embedding_service
         self.num_dimensions = num_dimensions
@@ -91,11 +92,12 @@ def __init__(
         self.metadata_mappings = metadata_mappings
 
     async def close(self) -> None:
-        return await self.es_client.close()
+        return await self.client.close()
 
     async def add_texts(
         self,
         texts: List[str],
+        *,
         metadatas: Optional[List[Dict[str, Any]]] = None,
         vectors: Optional[List[List[float]]] = None,
         ids: Optional[List[str]] = None,
@@ -136,7 +138,7 @@ async def add_texts(
 
             request: Dict[str, Any] = {
                 "_op_type": "index",
-                "_index": self.index_name,
+                "_index": self.index,
                 self.text_field: text,
                 "metadata": metadata,
                 "_id": ids[i],
@@ -150,7 +152,7 @@ async def add_texts(
         if len(requests) > 0:
             try:
                 success, failed = await async_bulk(
-                    self.es_client,
+                    self.client,
                     requests,
                     stats_only=True,
                     refresh=refresh_indices,
@@ -170,6 +172,7 @@ async def add_texts(
 
     async def delete(  # type: ignore[no-untyped-def]
         self,
+        *,
         ids: Optional[List[str]] = None,
         query: Optional[Dict[str, Any]] = None,
         refresh_indices: bool = True,
@@ -191,11 +194,11 @@ async def delete(  # type: ignore[no-untyped-def]
         try:
             if ids:
                 body = [
-                    {"_op_type": "delete", "_index": self.index_name, "_id": _id}
+                    {"_op_type": "delete", "_index": self.index, "_id": _id}
                     for _id in ids
                 ]
                 await async_bulk(
-                    self.es_client,
+                    self.client,
                     body,
                     refresh=refresh_indices,
                     ignore_status=404,
@@ -204,8 +207,8 @@ async def delete(  # type: ignore[no-untyped-def]
                 logger.debug(f"Deleted {len(body)} texts from index")
 
             else:
-                await self.es_client.delete_by_query(
-                    index=self.index_name,
+                await self.client.delete_by_query(
+                    index=self.index,
                     query=query,
                     refresh=refresh_indices,
                     **delete_kwargs,
@@ -221,6 +224,7 @@ async def delete(  # type: ignore[no-untyped-def]
 
     async def search(
         self,
+        *,
         query: Optional[str],
         query_vector: Optional[List[float]] = None,
         k: int = 4,
@@ -270,8 +274,8 @@ async def search(
             query_body = custom_query(query_body, query)
             logger.debug(f"Calling custom_query, Query body now: {query_body}")
 
-        response = await self.es_client.search(
-            index=self.index_name,
+        response = await self.client.search(
+            index=self.index,
             **query_body,
             size=k,
             source=True,
@@ -282,9 +286,9 @@ async def search(
         return hits
 
     async def _create_index_if_not_exists(self) -> None:
-        exists = await self.es_client.indices.exists(index=self.index_name)
+        exists = await self.client.indices.exists(index=self.index)
         if exists.meta.status == 200:
-            logger.debug(f"Index {self.index_name} already exists. Skipping creation.")
+            logger.debug(f"Index {self.index} already exists. Skipping creation.")
             return
 
         if self.retrieval_strategy.needs_inference():
@@ -312,14 +316,17 @@ async def _create_index_if_not_exists(self) -> None:
             mappings["properties"]["metadata"] = {"properties": metadata}
 
         await self.retrieval_strategy.before_index_creation(
-            self.es_client, self.text_field, self.vector_field
+            client=self.client,
+            text_field=self.text_field,
+            vector_field=self.vector_field,
         )
-        await self.es_client.indices.create(
-            index=self.index_name, mappings=mappings, settings=settings
+        await self.client.indices.create(
+            index=self.index, mappings=mappings, settings=settings
         )
 
     async def max_marginal_relevance_search(
         self,
+        *,
         embedding_service: AsyncEmbeddingService,
         query: str,
         vector_field: str,

diff --git a/elasticsearch/helpers/vectorstore/_sync/_utils.py b/elasticsearch/helpers/vectorstore/_sync/_utils.py
@@ -31,9 +31,9 @@ def model_must_be_deployed(client: Elasticsearch, model_id: str) -> None:
         pass
 
 
-def model_is_deployed(es_client: Elasticsearch, model_id: str) -> bool:
+def model_is_deployed(client: Elasticsearch, model_id: str) -> bool:
     try:
-        model_must_be_deployed(es_client, model_id)
+        model_must_be_deployed(client, model_id)
         return True
     except NotFoundError:
         return False
diff --git a/elasticsearch/helpers/vectorstore/_sync/embedding_service.py b/elasticsearch/helpers/vectorstore/_sync/embedding_service.py
@@ -52,7 +52,8 @@ class ElasticsearchEmbeddings(EmbeddingService):
 
     def __init__(
         self,
-        es_client: Elasticsearch,
+        *,
+        client: Elasticsearch,
         model_id: str,
         input_field: str = "text_field",
         user_agent: str = f"elasticsearch-py-es/{lib_version}",
@@ -63,14 +64,14 @@ def __init__(
         :param model_id: The model_id of the model deployed in the Elasticsearch cluster.
         :param input_field: The name of the key for the input text field in the
             document. Defaults to 'text_field'.
-        :param es_client: Elasticsearch client connection. Alternatively specify the
+        :param client: Elasticsearch client connection. Alternatively specify the
             Elasticsearch connection with the other es_* parameters.
         """
         # Add integration-specific usage header for tracking usage in Elastic Cloud.
         # client.options preserves existing (non-user-agent) headers.
-        es_client = es_client.options(headers={"User-Agent": user_agent})
+        client = client.options(headers={"User-Agent": user_agent})
 
-        self.es_client = es_client
+        self.client = client
         self.model_id = model_id
         self.input_field = input_field
 
@@ -82,7 +83,7 @@ def embed_query(self, text: str) -> List[float]:
         return result[0]
 
     def _embedding_func(self, texts: List[str]) -> List[List[float]]:
-        response = self.es_client.ml.infer_trained_model(
+        response = self.client.ml.infer_trained_model(
             model_id=self.model_id, docs=[{self.input_field: text} for text in texts]
         )
         return [doc["predicted_value"] for doc in response["inference_results"]]