From 0491854298532fec1ba7e66660b8a90a1af88b0a Mon Sep 17 00:00:00 2001 From: Parshva Daftari <89991302+parshvadaftari@users.noreply.github.com> Date: Wed, 25 Sep 2024 20:04:40 +0530 Subject: [PATCH] Fixing the bug when using Huggingface Models (#1877) Co-authored-by: parshvadaftari --- mem0/embeddings/huggingface.py | 2 +- .../embeddings/test_huggingface_embeddings.py | 20 +++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/mem0/embeddings/huggingface.py b/mem0/embeddings/huggingface.py index 6bfd5582c4..56d6a0725b 100644 --- a/mem0/embeddings/huggingface.py +++ b/mem0/embeddings/huggingface.py @@ -28,4 +28,4 @@ def embed(self, text): Returns: list: The embedding vector. """ - return self.model.encode(text) + return self.model.encode(text, convert_to_numpy = True).tolist() diff --git a/tests/embeddings/test_huggingface_embeddings.py b/tests/embeddings/test_huggingface_embeddings.py index de6f5852e0..980c1d6abd 100644 --- a/tests/embeddings/test_huggingface_embeddings.py +++ b/tests/embeddings/test_huggingface_embeddings.py @@ -1,5 +1,6 @@ import pytest from unittest.mock import Mock, patch +import numpy as np from mem0.embeddings.huggingface import HuggingFaceEmbedding from mem0.configs.embeddings.base import BaseEmbedderConfig @@ -16,11 +17,10 @@ def test_embed_default_model(mock_sentence_transformer): config = BaseEmbedderConfig() embedder = HuggingFaceEmbedding(config) - mock_sentence_transformer.encode.return_value = [0.1, 0.2, 0.3] + mock_sentence_transformer.encode.return_value = np.array([0.1, 0.2, 0.3]) result = embedder.embed("Hello world") - mock_sentence_transformer.encode.assert_called_once_with("Hello world") - + mock_sentence_transformer.encode.assert_called_once_with("Hello world", convert_to_numpy=True) assert result == [0.1, 0.2, 0.3] @@ -28,11 +28,10 @@ def test_embed_custom_model(mock_sentence_transformer): config = BaseEmbedderConfig(model="paraphrase-MiniLM-L6-v2") embedder = HuggingFaceEmbedding(config) - mock_sentence_transformer.encode.return_value = [0.4, 0.5, 0.6] + mock_sentence_transformer.encode.return_value = np.array([0.4, 0.5, 0.6]) result = embedder.embed("Custom model test") - mock_sentence_transformer.encode.assert_called_once_with("Custom model test") - + mock_sentence_transformer.encode.assert_called_once_with("Custom model test", convert_to_numpy=True) assert result == [0.4, 0.5, 0.6] @@ -40,11 +39,10 @@ def test_embed_with_model_kwargs(mock_sentence_transformer): config = BaseEmbedderConfig(model="all-MiniLM-L6-v2", model_kwargs={"device": "cuda"}) embedder = HuggingFaceEmbedding(config) - mock_sentence_transformer.encode.return_value = [0.7, 0.8, 0.9] + mock_sentence_transformer.encode.return_value = np.array([0.7, 0.8, 0.9]) result = embedder.embed("Test with device") - mock_sentence_transformer.encode.assert_called_once_with("Test with device") - + mock_sentence_transformer.encode.assert_called_once_with("Test with device", convert_to_numpy=True) assert result == [0.7, 0.8, 0.9] @@ -62,10 +60,10 @@ def test_embed_with_custom_embedding_dims(mock_sentence_transformer): config = BaseEmbedderConfig(model="all-mpnet-base-v2", embedding_dims=768) embedder = HuggingFaceEmbedding(config) - mock_sentence_transformer.encode.return_value = [1.0, 1.1, 1.2] + mock_sentence_transformer.encode.return_value = np.array([1.0, 1.1, 1.2]) result = embedder.embed("Custom embedding dims") - mock_sentence_transformer.encode.assert_called_once_with("Custom embedding dims") + mock_sentence_transformer.encode.assert_called_once_with("Custom embedding dims", convert_to_numpy=True) assert embedder.config.embedding_dims == 768