From 5146d2c1d1bdff3bed65d3022af78428b8bf6a9b Mon Sep 17 00:00:00 2001
From: Micko <mickolesmana@gmail.com>
Date: Tue, 1 Oct 2024 18:54:15 +0700
Subject: [PATCH 1/5] add 0.1.0 opensearch Embedding for langchain community

---
 .../embeddings/test_opensearch.py             | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 libs/community/tests/integration_tests/embeddings/test_opensearch.py

diff --git a/libs/community/tests/integration_tests/embeddings/test_opensearch.py b/libs/community/tests/integration_tests/embeddings/test_opensearch.py
new file mode 100644
index 0000000000000..c60d7ef8218ba
--- /dev/null
+++ b/libs/community/tests/integration_tests/embeddings/test_opensearch.py
@@ -0,0 +1,49 @@
+import pytest
+from opensearchpy import OpenSearch
+from langchain_community.embeddings.opensearch import OpenSearchEmbedding
+
+
+@pytest.fixture
+def model_id() -> str:
+    """Fixture to provide model ID."""
+    return "some-model-id"
+
+
+@pytest.fixture
+def opensearch_client() -> OpenSearch:
+    """Fixture to provide OpenSearch client connection."""
+    return OpenSearch(
+        hosts=[{'host': "localhost", 'port': 9200}],  # Remove sensitive info
+        http_auth=("username", "password"),  # Remove sensitive info
+        use_ssl=True,
+        verify_certs=False
+    )
+
+
+@pytest.fixture
+def opensearch_embedding(opensearch_client, model_id) -> OpenSearchEmbedding:
+    return OpenSearchEmbedding.from_opensearch_connection(opensearch_client, model_id)
+
+
+def test_opensearch_embedding_documents(opensearch_embedding: OpenSearchEmbedding) -> None:
+    """
+    Test OpenSearch embedding documents.
+    Convert a list of strings, into a list of floats with the shape of its element and its
+    embedding vector dimensions.
+    """
+    documents = ["foo bar", "bar foo", "foo"]
+    output = opensearch_embedding.embed_documents(documents)
+    assert len(output) == 3
+    assert len(output[0]) == 768  # Change 768 to the expected embedding size
+    assert len(output[1]) == 768  # Change 768 to the expected embedding size
+    assert len(output[2]) == 768  # Change 768 to the expected embedding size
+
+
+def test_opensearch_embedding_query(opensearch_embedding: OpenSearchEmbedding) -> None:
+    """
+    Test OpenSearch embedding documents.
+    Convert strings, into floats with the shape of its embedding vector dimensions.
+    """
+    document = "foo bar"
+    output = opensearch_embedding.embed_query(document)
+    assert len(output) == 768

From 13af9ab8022ceb14df252d0b400fa792111cee22 Mon Sep 17 00:00:00 2001
From: Micko <mickolesmana@gmail.com>
Date: Tue, 1 Oct 2024 18:59:33 +0700
Subject: [PATCH 2/5] add 0.1.0 opensearch Embedding for langchain community

---
 .../embeddings/opensearch.py                  | 88 +++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 libs/community/langchain_community/embeddings/opensearch.py

diff --git a/libs/community/langchain_community/embeddings/opensearch.py b/libs/community/langchain_community/embeddings/opensearch.py
new file mode 100644
index 0000000000000..fdb771a0cf16b
--- /dev/null
+++ b/libs/community/langchain_community/embeddings/opensearch.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, List
+
+if TYPE_CHECKING:
+    from opensearchpy import OpenSearch
+
+import json
+from langchain_core.embeddings import Embeddings
+
+class OpenSearchEmbedding(Embeddings):
+    def __init__(
+        self,
+        client: OpenSearch,
+        model_id: str,
+    ):
+        self.client = client
+        self.model_id = model_id
+
+    @classmethod
+    def from_opensearch_connection(
+        cls,
+        opensearch_connection: OpenSearch,
+        model_id: str,
+    ) -> OpenSearchEmbedding:
+        """
+        Class method to create an OpenSearchEmbedding object from an OpenSearch connection.
+
+        Args:
+            opensearch_connection (OpenSearch): The OpenSearch connection.
+            model_id (str): The ML model ID for generating embeddings.
+            input_field (str, optional): The input field for the text (default: 'text_field').
+
+        Returns:
+            OpenSearchEmbedding: An instance of the OpenSearchEmbedding class.
+        """
+        return cls(opensearch_connection, model_id)
+
+    def _embedding_func(self, texts: List[str]) -> List[List[float]]:
+        """
+        Internal method that sends a request to OpenSearch's text embedding endpoint
+        and retrieves embeddings for the provided texts.
+
+        Args:
+            texts (List[str]): A list of strings to be embedded.
+
+        Returns:
+            List[List[float]]: A list of embeddings, where each embedding is a list of floats.
+        """
+        endpoint = f"/_plugins/_ml/_predict/text_embedding/{self.model_id}"
+        body = {
+            "text_docs": texts,
+            "return_number": True,
+            "target_response": ["sentence_embedding"]
+        }
+
+        response = self.client.transport.perform_request(
+            method="POST",
+            url=endpoint,
+            body=json.dumps(body),
+        )
+        # Extract embeddings from the response
+        embeddings = [item['output'][0]['data'] for item in response['inference_results']]
+        return embeddings
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """
+        Generate embeddings for a list of documents.
+
+        Args:
+            texts (List[str]): A list of text documents to embed.
+
+        Returns:
+            List[List[float]]: A list of embeddings for each document.
+        """
+        return self._embedding_func(texts)
+
+    def embed_query(self, text: str) -> List[float]:
+        """
+        Generate an embedding for a single query.
+
+        Args:
+            text (str): The text query to embed.
+
+        Returns:
+            List[float]: The embedding for the query.
+        """
+        return self._embedding_func([text])[0]

From 7feca1688cf0a15592e3ea9c21783df3803cc44f Mon Sep 17 00:00:00 2001
From: Micko <mickolesmana@gmail.com>
Date: Tue, 1 Oct 2024 22:22:36 +0700
Subject: [PATCH 3/5] Fix lint and formatting in opnesearch and its test

---
 .../embeddings/opensearch.py                  | 31 ++++++------
 .../embeddings/test_opensearch.py             | 47 ++++++++++++-------
 2 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/libs/community/langchain_community/embeddings/opensearch.py b/libs/community/langchain_community/embeddings/opensearch.py
index fdb771a0cf16b..498813e53af92 100644
--- a/libs/community/langchain_community/embeddings/opensearch.py
+++ b/libs/community/langchain_community/embeddings/opensearch.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
+import json
 from typing import TYPE_CHECKING, List
 
+from langchain_core.embeddings import Embeddings
+
 if TYPE_CHECKING:
     from opensearchpy import OpenSearch
 
-import json
-from langchain_core.embeddings import Embeddings
 
-class OpenSearchEmbedding(Embeddings):
+class OpenSearchEmbeddings(Embeddings):
     def __init__(
         self,
         client: OpenSearch,
@@ -18,40 +19,41 @@ def __init__(
         self.model_id = model_id
 
     @classmethod
-    def from_opensearch_connection(
+    def from_connection(
         cls,
         opensearch_connection: OpenSearch,
         model_id: str,
-    ) -> OpenSearchEmbedding:
+    ) -> OpenSearchEmbeddings:
         """
-        Class method to create an OpenSearchEmbedding object from an OpenSearch connection.
+        Class method to create an OpenSearchEmbeddings object
+        from an OpenSearch connection.
 
         Args:
             opensearch_connection (OpenSearch): The OpenSearch connection.
             model_id (str): The ML model ID for generating embeddings.
-            input_field (str, optional): The input field for the text (default: 'text_field').
 
         Returns:
-            OpenSearchEmbedding: An instance of the OpenSearchEmbedding class.
+            OpenSearchEmbeddings: An instance of the OpenSearchEmbedding class.
         """
         return cls(opensearch_connection, model_id)
 
     def _embedding_func(self, texts: List[str]) -> List[List[float]]:
         """
-        Internal method that sends a request to OpenSearch's text embedding endpoint
-        and retrieves embeddings for the provided texts.
+        Internal method that sends a request to OpenSearch's text
+        embedding endpoint and retrieves embeddings for the provided texts.
 
         Args:
             texts (List[str]): A list of strings to be embedded.
 
         Returns:
-            List[List[float]]: A list of embeddings, where each embedding is a list of floats.
+            List[List[float]]: A list of embeddings,
+            where each embedding is a list of floats.
         """
         endpoint = f"/_plugins/_ml/_predict/text_embedding/{self.model_id}"
         body = {
             "text_docs": texts,
             "return_number": True,
-            "target_response": ["sentence_embedding"]
+            "target_response": ["sentence_embedding"],
         }
 
         response = self.client.transport.perform_request(
@@ -59,8 +61,9 @@ def _embedding_func(self, texts: List[str]) -> List[List[float]]:
             url=endpoint,
             body=json.dumps(body),
         )
-        # Extract embeddings from the response
-        embeddings = [item['output'][0]['data'] for item in response['inference_results']]
+        embeddings = [
+            item["output"][0]["data"] for item in response["inference_results"]
+        ]
         return embeddings
 
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
diff --git a/libs/community/tests/integration_tests/embeddings/test_opensearch.py b/libs/community/tests/integration_tests/embeddings/test_opensearch.py
index c60d7ef8218ba..98ceb332df035 100644
--- a/libs/community/tests/integration_tests/embeddings/test_opensearch.py
+++ b/libs/community/tests/integration_tests/embeddings/test_opensearch.py
@@ -1,6 +1,9 @@
+from typing import List
+
 import pytest
 from opensearchpy import OpenSearch
-from langchain_community.embeddings.opensearch import OpenSearchEmbedding
+
+from langchain_community.embeddings.opensearch import OpenSearchEmbeddings
 
 
 @pytest.fixture
@@ -10,40 +13,48 @@ def model_id() -> str:
 
 
 @pytest.fixture
-def opensearch_client() -> OpenSearch:
+def client() -> OpenSearch:
     """Fixture to provide OpenSearch client connection."""
     return OpenSearch(
-        hosts=[{'host': "localhost", 'port': 9200}],  # Remove sensitive info
+        hosts=[{"host": "localhost", "port": 9200}],  # Remove sensitive info
         http_auth=("username", "password"),  # Remove sensitive info
         use_ssl=True,
-        verify_certs=False
+        verify_certs=False,
     )
 
 
 @pytest.fixture
-def opensearch_embedding(opensearch_client, model_id) -> OpenSearchEmbedding:
-    return OpenSearchEmbedding.from_opensearch_connection(opensearch_client, model_id)
+def opensearch_embedding(client: OpenSearch, model_id: str) -> OpenSearchEmbeddings:
+    """Fixture to provide OpenSearch embeddings connection."""
+    return OpenSearchEmbeddings.from_connection(client, model_id)
+
+
+@pytest.fixture
+def documents() -> List[str]:
+    """Fixture for test documents."""
+    return ["foo bar", "bar foo", "foo"]
 
 
-def test_opensearch_embedding_documents(opensearch_embedding: OpenSearchEmbedding) -> None:
+def test_opensearch_embedding_documents(
+    opensearch_embedding: OpenSearchEmbeddings, documents: List[str]
+) -> None:
     """
     Test OpenSearch embedding documents.
-    Convert a list of strings, into a list of floats with the shape of its element and its
-    embedding vector dimensions.
+    Convert a list of strings into a list of floats,
+    with each element having the shape of its embedding vector dimensions.
     """
-    documents = ["foo bar", "bar foo", "foo"]
     output = opensearch_embedding.embed_documents(documents)
-    assert len(output) == 3
-    assert len(output[0]) == 768  # Change 768 to the expected embedding size
-    assert len(output[1]) == 768  # Change 768 to the expected embedding size
-    assert len(output[2]) == 768  # Change 768 to the expected embedding size
+    assert len(output) == len(documents)
+    for embedding in output:
+        assert len(embedding) == 768  # Expected embedding size
 
 
-def test_opensearch_embedding_query(opensearch_embedding: OpenSearchEmbedding) -> None:
+def test_opensearch_embedding_query(opensearch_embedding: OpenSearchEmbeddings) -> None:
     """
-    Test OpenSearch embedding documents.
-    Convert strings, into floats with the shape of its embedding vector dimensions.
+    Test OpenSearch embedding query.
+    Convert a string into a float array, with the shape
+    corresponding to its embedding vector dimensions.
     """
     document = "foo bar"
     output = opensearch_embedding.embed_query(document)
-    assert len(output) == 768
+    assert len(output) == 768  # Expected embedding size

From 74e07ce4f94cb6e327aa95118301e5584c449cea Mon Sep 17 00:00:00 2001
From: Micko <mickolesmana@gmail.com>
Date: Tue, 1 Oct 2024 22:34:48 +0700
Subject: [PATCH 4/5] adding OpenSearch.ipynb to /docs/docs/text_embedding

---
 .../text_embedding/opensearch.ipynb           | 148 ++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 docs/docs/integrations/text_embedding/opensearch.ipynb

diff --git a/docs/docs/integrations/text_embedding/opensearch.ipynb b/docs/docs/integrations/text_embedding/opensearch.ipynb
new file mode 100644
index 0000000000000..da37d568d1b7c
--- /dev/null
+++ b/docs/docs/integrations/text_embedding/opensearch.ipynb
@@ -0,0 +1,148 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "766a731c-fdc8-43a9-ab6a-aae8b3d82720",
+   "metadata": {},
+   "source": [
+    "# OpenSearch\n",
+    "\n",
+    "A guide to using embeddings with OpenSearch ML Plugins. Ensure that your OpenSearch cluster has the embedding plugins installed.\n",
+    "\n",
+    "For more information, visit: https://opensearch.org/docs/latest/ml-commons-plugin/pretrained-models/#sentence-transformers\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b0f624d-b469-4974-acd0-a8c8b74b5f48",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.embeddings.opensearch import OpenSearchEmbeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "213122f3-169e-4fa6-99cb-c8a3bc77aff8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Let's initialized opensearch client using opensearchpy\n",
+    "from opensearchpy import OpenSearch\n",
+    "\n",
+    "client = OpenSearch(\n",
+    "        hosts=[{'host': \"localhost\", 'port': 9200}],\n",
+    "        http_auth=(\"username\", \"password\"),\n",
+    "        use_ssl=True,\n",
+    "        verify_certs=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "029f030e-06b0-40ec-8848-f1a91c8762f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_id = \"embedding_model_id\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "013eabdf-9fbf-41f9-a932-7b580f2ece49",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeddings = OpenSearchEmbeddings.from_opensearch_connection(opensearch_client, model_id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c34d540-f642-48ef-ba10-be3f8948b6c7",
+   "metadata": {},
+   "source": [
+    "### Embedding documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8432efd-6315-4dcf-92a4-1a772c5caa9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "documents = [\"Foo\", \"Bar\", \"Foo Bar\"]\n",
+    "embedded_documents = embeddings.embed_documents(documents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "829001a3-2213-4eb3-9942-ee2583ff5577",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i, doc in enumerate(documents):\n",
+    "    print(f\"Document: {doc}\")\n",
+    "    print(f\"Embedding: {embedded_documents[i][:5]}...\")  # Show first 5 values to avoid overwhelming output\n",
+    "    print(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b831983-1c2f-4b75-a36e-e1fea374cb1c",
+   "metadata": {},
+   "source": [
+    "### Embedding a query"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c13f2f2-2357-4e31-a432-9a34d70bcc9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"Hello World!\"\n",
+    "embedded_query = embeddings.embed_query(query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1ad525df-e411-4f9c-a796-f8c388b21d7e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Query Embedding:\")\n",
+    "print(f\"Query: {query}\")\n",
+    "print(f\"Embedding: {embedded_query[:5]}...\")  # Show first 5 values of the embedding"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "vllm-langchain",
+   "language": "python",
+   "name": "vllm-langchain"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 742e1fc6b6de262368f430ac289865a94eb59f2a Mon Sep 17 00:00:00 2001
From: Micko <mickolesmana@gmail.com>
Date: Thu, 3 Oct 2024 14:39:02 +0700
Subject: [PATCH 5/5] fixing error from_opensearch_connection to
 from_connection

---
 docs/docs/integrations/text_embedding/opensearch.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/integrations/text_embedding/opensearch.ipynb b/docs/docs/integrations/text_embedding/opensearch.ipynb
index da37d568d1b7c..fb5aea1904217 100644
--- a/docs/docs/integrations/text_embedding/opensearch.ipynb
+++ b/docs/docs/integrations/text_embedding/opensearch.ipynb
@@ -57,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "embeddings = OpenSearchEmbeddings.from_opensearch_connection(opensearch_client, model_id)"
+    "embeddings = OpenSearchEmbeddings.from_connection(opensearch_client, model_id)"
    ]
   },
   {