From e6d0e246648114c4efcb98db3cfd3701c7095b8a Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Tue, 2 Jul 2024 15:41:22 -0700 Subject: [PATCH] small updates to vectorize docs --- .../components/embeddings/AstraVectorize.py | 27 +++++++------- .../components/vectorstores/AstraDB.py | 36 ++++++++++--------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/backend/base/langflow/components/embeddings/AstraVectorize.py b/src/backend/base/langflow/components/embeddings/AstraVectorize.py index cd618f34659..0986dd9a378 100644 --- a/src/backend/base/langflow/components/embeddings/AstraVectorize.py +++ b/src/backend/base/langflow/components/embeddings/AstraVectorize.py @@ -50,37 +50,38 @@ class AstraVectorize(Component): inputs = [ DropdownInput( name="provider", - display_name="Provider name", + display_name="Provider", options=VECTORIZE_PROVIDERS_MAPPING.keys(), value="", + required=True, ), MessageTextInput( name="model_name", - display_name="Model name", + display_name="Model Name", info=f"The embedding model to use for the selected provider. Each provider has a different set of models " - f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", + f"available (https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", required=True, ), + SecretStrInput( + name="provider_api_key", + display_name="Provider API Key", + info="An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.", + ), MessageTextInput( name="api_key_name", - display_name="API Key name", + display_name="Provider API Key Name", info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.", + advanced=True, ), DictInput( name="authentication", - display_name="Authentication parameters", + display_name="Authentication Parameters", is_list=True, advanced=True, ), - SecretStrInput( - name="provider_api_key", - display_name="Provider API Key", - info="An alternative to the Astra Authentication that let you use directly the API key of the provider.", - advanced=True, - ), DictInput( name="model_parameters", - display_name="Model parameters", + display_name="Model Parameters", advanced=True, is_list=True, ), @@ -96,7 +97,7 @@ def build_options(self) -> dict[str, Any]: if api_key_name: authentication["providerKey"] = api_key_name return { - # must match exactly astra CollectionVectorServiceOptions + # must match astrapy.info.CollectionVectorServiceOptions "collection_vector_service_options": { "provider": provider_value, "modelName": self.model_name, diff --git a/src/backend/base/langflow/components/vectorstores/AstraDB.py b/src/backend/base/langflow/components/vectorstores/AstraDB.py index d29dbd2358f..fe437dc67f5 100644 --- a/src/backend/base/langflow/components/vectorstores/AstraDB.py +++ b/src/backend/base/langflow/components/vectorstores/AstraDB.py @@ -116,6 +116,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): name="embedding", display_name="Embedding or Astra Vectorize", input_types=["Embeddings", "dict"], + info="Allows either an embedding model or an Astra Vectorize configuration. If Astra Vectorize is already configured for the collection, this field is not required.", ), StrInput( name="metadata_indexing_exclude", @@ -163,6 +164,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): def _build_vector_store_no_ingest(self): if self._cached_vectorstore: return self._cached_vectorstore + try: from langchain_astradb import AstraDBVectorStore from langchain_astradb.utils.astradb import SetupMode @@ -225,11 +227,6 @@ def _build_vector_store_no_ingest(self): return vector_store - def build_vector_store(self): - vector_store = self._build_vector_store_no_ingest() - self._add_documents_to_vector_store(vector_store) - return vector_store - def _add_documents_to_vector_store(self, vector_store): documents = [] for _input in self.ingest_data or []: @@ -255,6 +252,18 @@ def _map_search_type(self): else: return "similarity" + def _build_search_args(self): + args = { + "k": self.number_of_results, + "score_threshold": self.search_score_threshold, + } + + if self.search_filter: + clean_filter = {k: v for k, v in self.search_filter.items() if k and v} + if len(clean_filter) > 0: + args["filter"] = clean_filter + return args + def search_documents(self) -> list[Data]: vector_store = self._build_vector_store_no_ingest() self._add_documents_to_vector_store(vector_store) @@ -282,21 +291,14 @@ def search_documents(self) -> list[Data]: logger.debug("No search input provided. Skipping search.") return [] - def _build_search_args(self): - args = { - "k": self.number_of_results, - "score_threshold": self.search_score_threshold, - } - - if self.search_filter: - clean_filter = {k: v for k, v in self.search_filter.items() if k and v} - if len(clean_filter) > 0: - args["filter"] = clean_filter - return args - def get_retriever_kwargs(self): search_args = self._build_search_args() return { "search_type": self._map_search_type(), "search_kwargs": search_args, } + + def build_vector_store(self): + vector_store = self._build_vector_store_no_ingest() + self._add_documents_to_vector_store(vector_store) + return vector_store