From 6ba8a5a1e4a886ebe44af90d9ce3eb842b24b697 Mon Sep 17 00:00:00 2001 From: Javier Puerto Date: Sun, 17 Mar 2024 11:21:07 +0100 Subject: [PATCH 1/3] Add support for Text Embeddings Inference (TEI). --- src/initialize.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/initialize.py b/src/initialize.py index 42dd93b..63e2c15 100644 --- a/src/initialize.py +++ b/src/initialize.py @@ -6,7 +6,7 @@ import yaml from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI -from langchain.embeddings import HuggingFaceEmbeddings +from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceHubEmbeddings from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, @@ -82,10 +82,7 @@ def _init_vector_store_pinecone(config_loader): ) index_name = config_loader["vector_store_index_name"] index = pinecone.Index(index_name) - embeddings = HuggingFaceEmbeddings( - model_name=config_loader["embeddings_model_name"], - model_kwargs={"device": "cpu"}, - ) + embeddings = _init_embeddings(config_loader=config_loader) vector_store = Pinecone(index, embeddings.embed_query, "text") logger.info(pinecone.describe_index(index_name)) logger.info(index.describe_index_stats()) @@ -103,10 +100,7 @@ def _init_vector_store_supabase(config_loader): supabase_key=os.environ.get("SUPABASE_API_KEY"), options=ClientOptions(postgrest_client_timeout=60), ) - embeddings = HuggingFaceEmbeddings( - model_name=config_loader["embeddings_model_name"], - model_kwargs={"device": "cpu"}, - ) + embeddings = _init_embeddings(config_loader) vector_store = StandardSupabaseVectorStore( client=supabase_client, embedding=embeddings, @@ -116,7 +110,6 @@ def _init_vector_store_supabase(config_loader): logger.info("Initialized vector store") return vector_store - def _init_vector_stores_qdrant(config_loader): logger = lg.getLogger(_init_vector_stores_qdrant.__name__) logger.info("Initializing vector stores") @@ -125,10 +118,7 @@ def _init_vector_stores_qdrant(config_loader): api_key=os.environ["QDRANT_API_KEY"], prefer_grpc=True, ) - embeddings = HuggingFaceEmbeddings( - model_name=config_loader["embeddings_model_name"], - model_kwargs={"device": "cpu"}, - ) + embeddings = _init_embeddings(config_loader) vector_stores = {} for collection_name in config_loader["collections"]: if not _exists_collection(qdrant_client, collection_name): @@ -145,6 +135,16 @@ def _init_vector_stores_qdrant(config_loader): logger.info("Initialized vector store for collection [%s]", collection_name) return vector_stores +def _init_embeddings(config_loader): + model: str = config_loader["embeddings_model_name"] + if model.startswith('http'): + return HuggingFaceHubEmbeddings(model=model) + else: + return HuggingFaceEmbeddings( + model_name=config_loader["embeddings_model_name"], + model_kwargs={"device": "cpu"}, + ) + def _init_openai_client(): logger = lg.getLogger(_init_openai_client.__name__) From ac9a5e447c60cde58cbe4f02dd84567f9b619824 Mon Sep 17 00:00:00 2001 From: Javier Puerto Date: Sun, 17 Mar 2024 12:30:29 +0100 Subject: [PATCH 2/3] Update requirements and documentation. --- doc/use_tei_for_embeddings.md | 27 +++++++++++++++++++++++++++ requirements.txt | 4 ++-- 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 doc/use_tei_for_embeddings.md diff --git a/doc/use_tei_for_embeddings.md b/doc/use_tei_for_embeddings.md new file mode 100644 index 0000000..a067764 --- /dev/null +++ b/doc/use_tei_for_embeddings.md @@ -0,0 +1,27 @@ +# Use Text Embeddings Inference (TEI) support + +TEI is an optimized tooklit for deploying and serving text embeddings and sequence classification models. + +See more information in the [TEI documentation](https://huggingface.co/docs/text-embeddings-inference/index) + +**Current limitation:** The `chunk_size` option must be 510 or lower to work. I was not able to configure a higher size. + +## How to use + +It is simple, just run a docker image suitable to your [compatible hardware](https://huggingface.co/docs/text-embeddings-inference/supported_models) like the following: + +```shell +docker run --gpus all -e HUGGING_FACE_HUB_TOKEN= -p 8080:80 -v :/data ghcr.io/huggingface/text-embeddings-inference:turing-1.1 --model-id dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn --max-client-batch-size 64 +``` + +The previous command will start a new service with the model `dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn` ready to generate embeddings. + +In justicio's configuration, limit `chunk_size` to 510 and change the `embeddings_model_name` to the URL where TEI service is running and listening, like *http://localhost:8080*. + +You will need to have an environment variable where justicio is running to provide the HF token. + +```shell +HUGGINGFACEHUB_API_TOKEN= python -m src.etls.boja.load dates 2024/01/01 2024/01/31 +``` + +Embeddings will be generated using TEI and embedded into the configured vector database (only tested with Qdrant). \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dab0ff7..1617773 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,9 +13,9 @@ retry==0.9.2 typer==0.9.0 schedule==1.2.1 -langchain==0.0.305 +langchain==0.0.354 # langchainplus-sdk==0.0.20 -langsmith==0.0.41 +langsmith==0.0.92 qdrant-client==1.5.4 supabase==1.0.2 pinecone-client==2.2.2 From 3552d329aebdb6b453c6e477347455126df709a4 Mon Sep 17 00:00:00 2001 From: Javier Puerto Date: Sun, 17 Mar 2024 15:34:31 +0100 Subject: [PATCH 3/3] Update src/initialize.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Darío López Padial --- src/initialize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/initialize.py b/src/initialize.py index 63e2c15..16e03e0 100644 --- a/src/initialize.py +++ b/src/initialize.py @@ -141,7 +141,7 @@ def _init_embeddings(config_loader): return HuggingFaceHubEmbeddings(model=model) else: return HuggingFaceEmbeddings( - model_name=config_loader["embeddings_model_name"], + model_name=model, model_kwargs={"device": "cpu"}, )