From 9d16016439313cd37a1c24523142e1d740765593 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 25 Sep 2024 15:32:42 -0400 Subject: [PATCH] Include more information about the caller Currently, when making calls to AstraDB, we include a `User-Agent` header identifying the API caller. This header is currently set to "langchain" by default at the moment. This change threads the `caller_name` parameter throughout the API and appends a suffix identifying which class created the API client: * langchain/cache * langchain/chat_message_history * langchain/document_loader * langchain/graph_vectorstore * langchain/semantic_cache * langchain/vectorstore This metadata will be useful for debugging and tracking utilization of different library features. NOTE: This change will change the value of User-Agent headers, for example from `langchain/0.3.6` to `langchain/vectorstore/0.3.6` --- libs/astradb/langchain_astradb/cache.py | 6 ++++++ libs/astradb/langchain_astradb/chat_message_histories.py | 3 +++ libs/astradb/langchain_astradb/document_loaders.py | 3 +++ libs/astradb/langchain_astradb/graph_vectorstores.py | 2 ++ libs/astradb/langchain_astradb/utils/astradb.py | 4 +++- libs/astradb/langchain_astradb/vectorstores.py | 3 +++ 6 files changed, 20 insertions(+), 1 deletion(-) diff --git a/libs/astradb/langchain_astradb/cache.py b/libs/astradb/langchain_astradb/cache.py index adc1b69..9d28dd7 100644 --- a/libs/astradb/langchain_astradb/cache.py +++ b/libs/astradb/langchain_astradb/cache.py @@ -109,6 +109,7 @@ def __init__( namespace: str | None = None, pre_delete_collection: bool = False, setup_mode: SetupMode = SetupMode.SYNC, + caller_name: str = "langchain/cache", ): """Cache that uses Astra DB as a backend. @@ -148,6 +149,7 @@ def __init__( pre_delete_collection: whether to delete the collection before creating it. If False and the collection already exists, the collection will be used as is. + caller_name: a name used to identify API calls in the User-Agent header. """ self.astra_env = _AstraDBCollectionEnvironment( collection_name=collection_name, @@ -159,6 +161,7 @@ def __init__( namespace=namespace, setup_mode=setup_mode, pre_delete_collection=pre_delete_collection, + caller_name=caller_name, ) self.collection = self.astra_env.collection self.async_collection = self.astra_env.async_collection @@ -326,6 +329,7 @@ def __init__( embedding: Embeddings, metric: str | None = None, similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD, + caller_name: str = "langchain/semantic_cache", ): """Astra DB semantic cache. @@ -375,6 +379,7 @@ def __init__( Defaults to 'cosine' (alternatives: 'euclidean', 'dot_product') similarity_threshold: the minimum similarity for accepting a (semantic-search) match. + caller_name: a name used to identify API calls in the User-Agent header. """ self.embedding = embedding self.metric = metric @@ -416,6 +421,7 @@ async def _acache_embedding(text: str) -> list[float]: pre_delete_collection=pre_delete_collection, embedding_dimension=embedding_dimension, metric=metric, + caller_name=caller_name, ) self.collection = self.astra_env.collection self.async_collection = self.astra_env.async_collection diff --git a/libs/astradb/langchain_astradb/chat_message_histories.py b/libs/astradb/langchain_astradb/chat_message_histories.py index 1b71707..f6fa572 100644 --- a/libs/astradb/langchain_astradb/chat_message_histories.py +++ b/libs/astradb/langchain_astradb/chat_message_histories.py @@ -40,6 +40,7 @@ def __init__( namespace: str | None = None, setup_mode: SetupMode = SetupMode.SYNC, pre_delete_collection: bool = False, + caller_name: str = "langchain/chat_message_history", ) -> None: """Chat message history that stores history in Astra DB. @@ -73,6 +74,7 @@ def __init__( setup_mode: mode used to create the Astra DB collection (SYNC, ASYNC or OFF). pre_delete_collection: whether to delete the collection. + caller_name: a name used to identify API calls in the User-Agent header. """ self.astra_env = _AstraDBCollectionEnvironment( collection_name=collection_name, @@ -84,6 +86,7 @@ def __init__( namespace=namespace, setup_mode=setup_mode, pre_delete_collection=pre_delete_collection, + caller_name=caller_name, ) self.collection = self.astra_env.collection diff --git a/libs/astradb/langchain_astradb/document_loaders.py b/libs/astradb/langchain_astradb/document_loaders.py index 37ee41d..04fa6b1 100644 --- a/libs/astradb/langchain_astradb/document_loaders.py +++ b/libs/astradb/langchain_astradb/document_loaders.py @@ -49,6 +49,7 @@ def __init__( nb_prefetched: int = _NOT_SET, # type: ignore[assignment] page_content_mapper: Callable[[dict], str] = json.dumps, metadata_mapper: Callable[[dict], dict[str, Any]] | None = None, + caller_name: str = "langchain/document_loader", ) -> None: """Load DataStax Astra DB documents. @@ -91,6 +92,7 @@ def __init__( metadata_mapper: Function applied to collection documents to create the `metadata` of the LangChain Document. Defaults to returning the namespace, API endpoint and collection name. + caller_name: a name used to identify API calls in the User-Agent header. """ astra_db_env = _AstraDBCollectionEnvironment( collection_name=collection_name, @@ -101,6 +103,7 @@ def __init__( async_astra_db_client=async_astra_db_client, namespace=namespace, setup_mode=SetupMode.OFF, + caller_name=caller_name, ) self.astra_db_env = astra_db_env self.filter = filter_criteria diff --git a/libs/astradb/langchain_astradb/graph_vectorstores.py b/libs/astradb/langchain_astradb/graph_vectorstores.py index d796915..1d0601d 100644 --- a/libs/astradb/langchain_astradb/graph_vectorstores.py +++ b/libs/astradb/langchain_astradb/graph_vectorstores.py @@ -54,6 +54,7 @@ def __init__( metadata_indexing_include: Iterable[str] | None = None, metadata_indexing_exclude: Iterable[str] | None = None, collection_indexing_policy: dict[str, Any] | None = None, + caller_name: str = "langchain/graph_vectorstore", **kwargs: Any, ): """Create a new Graph Vector Store backed by AstraDB.""" @@ -68,6 +69,7 @@ def __init__( metadata_indexing_include=metadata_indexing_include, metadata_indexing_exclude=metadata_indexing_exclude, collection_indexing_policy=collection_indexing_policy, + caller_name=caller_name, **kwargs, ) diff --git a/libs/astradb/langchain_astradb/utils/astradb.py b/libs/astradb/langchain_astradb/utils/astradb.py index ae7d332..e5c285a 100644 --- a/libs/astradb/langchain_astradb/utils/astradb.py +++ b/libs/astradb/langchain_astradb/utils/astradb.py @@ -94,6 +94,7 @@ def __init__( astra_db_client: AstraDB | None = None, async_astra_db_client: AsyncAstraDB | None = None, namespace: str | None = None, + caller_name: str = "langchain", ) -> None: self.token: str | TokenProvider | None self.api_endpoint: str | None @@ -221,7 +222,6 @@ def __init__( raise ValueError(msg) # create the clients - caller_name = "langchain" caller_version = getattr(langchain_core, "__version__", None) self.data_api_client = DataAPIClient( @@ -256,6 +256,7 @@ def __init__( default_indexing_policy: dict[str, Any] | None = None, collection_vector_service_options: CollectionVectorServiceOptions | None = None, collection_embedding_api_key: str | EmbeddingHeadersProvider | None = None, + caller_name: str = "langchain", ) -> None: super().__init__( token=token, @@ -264,6 +265,7 @@ def __init__( astra_db_client=astra_db_client, async_astra_db_client=async_astra_db_client, namespace=namespace, + caller_name=caller_name, ) self.collection_name = collection_name self.collection = self.database.get_collection( diff --git a/libs/astradb/langchain_astradb/vectorstores.py b/libs/astradb/langchain_astradb/vectorstores.py index 2ef136d..53a0f11 100644 --- a/libs/astradb/langchain_astradb/vectorstores.py +++ b/libs/astradb/langchain_astradb/vectorstores.py @@ -389,6 +389,7 @@ def __init__( content_field: str | None = None, ignore_invalid_documents: bool = False, autodetect_collection: bool = False, + caller_name: str = "langchain/vectorstore", ) -> None: """Wrapper around DataStax Astra DB for vector-store workloads. @@ -497,6 +498,7 @@ def __init__( ``metric``, ``setup_mode``, ``metadata_indexing_include``, ``metadata_indexing_exclude``, ``collection_indexing_policy``, ``collection_vector_service_options``. + caller_name: a name used to identify API calls in the User-Agent header. Note: For concurrency in synchronous :meth:`~add_texts`:, as a rule of thumb, @@ -664,6 +666,7 @@ def __init__( default_indexing_policy=DEFAULT_INDEXING_OPTIONS, collection_vector_service_options=self.collection_vector_service_options, collection_embedding_api_key=self.collection_embedding_api_key, + caller_name=caller_name, ) def _get_safe_embedding(self) -> Embeddings: