-
Notifications
You must be signed in to change notification settings - Fork 4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
docs: small updates to astra vectorize docs #2497
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,37 +50,38 @@ class AstraVectorize(Component): | |
inputs = [ | ||
DropdownInput( | ||
name="provider", | ||
display_name="Provider name", | ||
display_name="Provider", | ||
options=VECTORIZE_PROVIDERS_MAPPING.keys(), | ||
value="", | ||
required=True, | ||
), | ||
MessageTextInput( | ||
name="model_name", | ||
display_name="Model name", | ||
display_name="Model Name", | ||
info=f"The embedding model to use for the selected provider. Each provider has a different set of models " | ||
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", | ||
f"available (https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", | ||
required=True, | ||
), | ||
SecretStrInput( | ||
name="provider_api_key", | ||
display_name="Provider API Key", | ||
info="An alternative to the Astra Authentication that passes an API key for the provider with each request to Astra DB. This may be used when Vectorize is configured for the collection, but no corresponding provider secret is stored within Astra's key management system.", | ||
), | ||
MessageTextInput( | ||
name="api_key_name", | ||
display_name="API Key name", | ||
display_name="Provider API Key Name", | ||
info="The name of the embeddings provider API key stored on Astra. If set, it will override the 'ProviderKey' in the authentication parameters.", | ||
advanced=True, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a common case where the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you mean? there's no default value. The value is defined by the user There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe it does default just to
But going through the UI flow, I see that you are correct in your other comment that the user-defined key name is required during creation. So I will undo these changes, and have |
||
), | ||
DictInput( | ||
name="authentication", | ||
display_name="Authentication parameters", | ||
display_name="Authentication Parameters", | ||
is_list=True, | ||
advanced=True, | ||
), | ||
SecretStrInput( | ||
name="provider_api_key", | ||
display_name="Provider API Key", | ||
info="An alternative to the Astra Authentication that let you use directly the API key of the provider.", | ||
advanced=True, | ||
), | ||
DictInput( | ||
name="model_parameters", | ||
display_name="Model parameters", | ||
display_name="Model Parameters", | ||
advanced=True, | ||
is_list=True, | ||
), | ||
|
@@ -96,7 +97,7 @@ def build_options(self) -> dict[str, Any]: | |
if api_key_name: | ||
authentication["providerKey"] = api_key_name | ||
return { | ||
# must match exactly astra CollectionVectorServiceOptions | ||
# must match astrapy.info.CollectionVectorServiceOptions | ||
"collection_vector_service_options": { | ||
"provider": provider_value, | ||
"modelName": self.model_name, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,6 +116,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): | |
name="embedding", | ||
display_name="Embedding or Astra Vectorize", | ||
input_types=["Embeddings", "dict"], | ||
info="Allows either an embedding model or an Astra Vectorize configuration. If Astra Vectorize is already configured for the collection, this field is not required.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you try what happens if you don't set it ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ugh, yeah. I guess that's something to figure out. It's required by langchain-astradb to have one or the other at the moment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps we could loosen that constraint iff the setup mode is |
||
), | ||
StrInput( | ||
name="metadata_indexing_exclude", | ||
|
@@ -163,6 +164,7 @@ class AstraVectorStoreComponent(LCVectorStoreComponent): | |
def _build_vector_store_no_ingest(self): | ||
if self._cached_vectorstore: | ||
return self._cached_vectorstore | ||
|
||
try: | ||
from langchain_astradb import AstraDBVectorStore | ||
from langchain_astradb.utils.astradb import SetupMode | ||
|
@@ -225,11 +227,6 @@ def _build_vector_store_no_ingest(self): | |
|
||
return vector_store | ||
|
||
def build_vector_store(self): | ||
vector_store = self._build_vector_store_no_ingest() | ||
self._add_documents_to_vector_store(vector_store) | ||
return vector_store | ||
|
||
def _add_documents_to_vector_store(self, vector_store): | ||
documents = [] | ||
for _input in self.ingest_data or []: | ||
|
@@ -255,6 +252,18 @@ def _map_search_type(self): | |
else: | ||
return "similarity" | ||
|
||
def _build_search_args(self): | ||
args = { | ||
"k": self.number_of_results, | ||
"score_threshold": self.search_score_threshold, | ||
} | ||
|
||
if self.search_filter: | ||
clean_filter = {k: v for k, v in self.search_filter.items() if k and v} | ||
if len(clean_filter) > 0: | ||
args["filter"] = clean_filter | ||
return args | ||
|
||
def search_documents(self) -> list[Data]: | ||
vector_store = self._build_vector_store_no_ingest() | ||
self._add_documents_to_vector_store(vector_store) | ||
|
@@ -282,21 +291,14 @@ def search_documents(self) -> list[Data]: | |
logger.debug("No search input provided. Skipping search.") | ||
return [] | ||
|
||
def _build_search_args(self): | ||
args = { | ||
"k": self.number_of_results, | ||
"score_threshold": self.search_score_threshold, | ||
} | ||
|
||
if self.search_filter: | ||
clean_filter = {k: v for k, v in self.search_filter.items() if k and v} | ||
if len(clean_filter) > 0: | ||
args["filter"] = clean_filter | ||
return args | ||
|
||
def get_retriever_kwargs(self): | ||
search_args = self._build_search_args() | ||
return { | ||
"search_type": self._map_search_type(), | ||
"search_kwargs": search_args, | ||
} | ||
|
||
def build_vector_store(self): | ||
vector_store = self._build_vector_store_no_ingest() | ||
self._add_documents_to_vector_store(vector_store) | ||
return vector_store |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made this appear by default. The reasoning is that
a) This is the quickest path to testing out vectorize on a new collection
b) If the collection is already configured for vectorize, but the key was not stored in astra, this is (by my guess) the most common pattern for authentication.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the most common pattern is to store keys on astra, that's why I've set api_key_name in the front page.
If you go through the documentation the steps are to import the key in astra and refer to it when creating the collection