misc updates

IlyasMoutawwakil · Sep 4, 2024 · 163777e · 163777e
1 parent c5c2176
commit 163777e
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 17 deletions.
diff --git a/example.py b/example.py
@@ -1,14 +1,15 @@
 from py_txi.text_embedding_inference import TEI, TEIConfig
 from py_txi.text_generation_inference import TGI, TGIConfig
 
-llm = TGI(config=TGIConfig(model_id="bigscience/bloom-560m", gpus="0"))
-output = llm.generate(["Hi, I'm a language model", "I'm fine, how are you?"])
-print(len(output))
-print("LLM:", output)
-llm.close()
+for gpus in [None, "1", "1,2"]:
+    llm = TGI(config=TGIConfig(model_id="bigscience/bloom-560m", gpus=gpus))
+    output = llm.generate(["Hi, I'm a language model", "I'm fine, how are you?"])
+    print(len(output))
+    print("LLM:", output)
+    llm.close()
 
-embed = TEI(config=TEIConfig(model_id="BAAI/bge-base-en-v1.5"))
-output = embed.encode(["Hi, I'm an embedding model", "I'm fine, how are you?"])
-print(len(output))
-print("Embed:", output)
-embed.close()
+    embed = TEI(config=TEIConfig(model_id="BAAI/bge-base-en-v1.5", gpus=gpus))
+    output = embed.encode(["Hi, I'm an embedding model", "I'm fine, how are you?"])
+    print(len(output))
+    print("Embed:", output)
+    embed.close()
diff --git a/py_txi/text_embedding_inference.py b/py_txi/text_embedding_inference.py
@@ -28,10 +28,10 @@ def __post_init__(self) -> None:
 
         if self.image is None:
             if is_nvidia_system() and self.gpus is not None:
-                LOGGER.info("\t+ Using the latest NVIDIA GPU image for Text-Embedding-Inference")
-                self.image = "ghcr.io/huggingface/text-embeddings-inference:latest"
+                LOGGER.info("\t+ Using latest NVIDIA CUDA GPU image for Text-Embedding-Inference")
+                self.image = "ghcr.io/huggingface/text-embeddings-inference:cuda-latest"
             else:
-                LOGGER.info("\t+ Using version 1.4 image for Text-Embedding-Inference")
+                LOGGER.info("\t+ Using CPU image version 1.4 for Text-Embedding-Inference (before onnx backend)")
                 self.image = "ghcr.io/huggingface/text-embeddings-inference:cpu-1.4"
 
         if is_nvidia_system() and "cpu" in self.image:

diff --git a/py_txi/text_generation_inference.py b/py_txi/text_generation_inference.py
@@ -32,14 +32,14 @@ def __post_init__(self) -> None:
 
         if self.image is None:
             if is_nvidia_system() and self.gpus is not None:
-                LOGGER.info("\t+ Using latest NVIDIA GPU image for Text-Generation-Inference")
+                LOGGER.info("\t+ Using latest NVIDIA CUDA GPU image for Text-Generation-Inference")
                 self.image = "ghcr.io/huggingface/text-generation-inference:latest"
             elif is_rocm_system() and self.devices is not None:
-                LOGGER.info("\t+ Using latest ROCm AMD GPU image for Text-Generation-Inference")
+                LOGGER.info("\t+ Using latest AMD ROCm GPU image for Text-Generation-Inference")
                 self.image = "ghcr.io/huggingface/text-generation-inference:latest-rocm"
             else:
-                LOGGER.info("\t+ Using version 1.4 image for Text-Generation-Inference (last image with CPU support)")
-                self.image = "ghcr.io/huggingface/text-generation-inference:1.4"
+                LOGGER.info("\t+ Using latest image for Text-Generation-Inference")
+                self.image = "ghcr.io/huggingface/text-generation-inference:latest"
 
         if is_rocm_system() and "rocm" not in self.image:
             LOGGER.warning("\t+ You are running on a ROCm AMD GPU system but using a non-ROCM image.")