From df65759d39096250ae8dd198ad647a1419995fcc Mon Sep 17 00:00:00 2001
From: B-Step62 <yuki.watanabe@databricks.com>
Date: Mon, 23 Sep 2024 21:50:43 +0900
Subject: [PATCH] Add integration test for vectorstore

Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>
---
 .../integration_tests/test_chat_models.py     | 11 +++
 .../integration_tests/test_embeddings.py      | 12 +++
 .../integration_tests/test_vectorstore.py     | 73 +++++++++++++++++++
 3 files changed, 96 insertions(+)
 create mode 100644 libs/databricks/tests/integration_tests/test_vectorstore.py

diff --git a/libs/databricks/tests/integration_tests/test_chat_models.py b/libs/databricks/tests/integration_tests/test_chat_models.py
index d517267..6c995de 100644
--- a/libs/databricks/tests/integration_tests/test_chat_models.py
+++ b/libs/databricks/tests/integration_tests/test_chat_models.py
@@ -1,3 +1,14 @@
+"""
+This file contains the integration test for ChatDatabricks class.
+
+We run the integration tests nightly by the trusted CI/CD system defined in
+a private repository, in order to securely run the tests. With this design,
+integration test is not intended to be run manually by OSS contributors.
+If you want to update the ChatDatabricks implementation and you think that you
+need to update the corresponding integration test, please contact to the
+maintainers of the repository to verify the changes.
+"""
+
 from typing import Annotated
 from unittest import mock
 
diff --git a/libs/databricks/tests/integration_tests/test_embeddings.py b/libs/databricks/tests/integration_tests/test_embeddings.py
index d0c6b10..d871cd5 100644
--- a/libs/databricks/tests/integration_tests/test_embeddings.py
+++ b/libs/databricks/tests/integration_tests/test_embeddings.py
@@ -1,7 +1,19 @@
+"""
+This file contains the integration test for DatabricksEmbeddings class.
+
+We run the integration tests nightly by the trusted CI/CD system defined in
+a private repository, in order to securely run the tests. With this design,
+integration test is not intended to be run manually by OSS contributors.
+If you want to update the DatabricksEmbeddings implementation and you think
+that you need to update the corresponding integration test, please contact to
+the maintainers of the repository to verify the changes.
+"""
+
 from langchain_databricks import DatabricksEmbeddings
 
 _TEST_ENDPOINT = "databricks-bge-large-en"
 
+
 def test_embedding_documents() -> None:
     documents = ["foo bar"]
     embedding = DatabricksEmbeddings(endpoint=_TEST_ENDPOINT)
diff --git a/libs/databricks/tests/integration_tests/test_vectorstore.py b/libs/databricks/tests/integration_tests/test_vectorstore.py
new file mode 100644
index 0000000..49fffae
--- /dev/null
+++ b/libs/databricks/tests/integration_tests/test_vectorstore.py
@@ -0,0 +1,73 @@
+"""
+This file contains the integration test for DatabricksVectorSearch class.
+
+We run the integration tests nightly by the trusted CI/CD system defined in
+a private repository, in order to securely run the tests. With this design,
+integration test is not intended to be run manually by OSS contributors.
+If you want to update the DatabricksVectorSearch implementation and you think
+that you need to update the corresponding integration test, please contact to
+the maintainers of the repository to verify the changes.
+"""
+
+import os
+import time
+
+import requests
+
+
+def test_vectorstore():
+    """
+    We run the integration tests for vector store by Databricks Workflow,
+    because the setup is too complex to run within a single python file.
+    Thereby, this test simply triggers the workflow by calling the REST API.
+    """
+    test_endpoint = os.getenv("DATABRICKS_HOST")
+    test_job_id = os.getenv("VS_TEST_JOB_ID")
+    headers = {
+        "Authorization": f"Bearer {os.getenv('DATABRICKS_TOKEN')}",
+    }
+
+    # Check if there is any ongoing job run
+    response = requests.get(
+        f"{test_endpoint}/api/2.1/jobs/runs/list",
+        json={
+            "job_id": test_job_id,
+            "active_only": True,
+        },
+        headers=headers,
+    )
+    no_active_run = "runs" not in response.json() or len(response.json()["runs"]) == 0
+    assert no_active_run, "There is an ongoing job run. Please wait for it to complete."
+
+    # Trigger the workflow
+    response = requests.post(
+        f"{test_endpoint}/api/2.1/jobs/run-now",
+        json={
+            "job_id": test_job_id,
+        },
+        headers=headers,
+    )
+
+    assert response.status_code == 200, "Failed to trigger the workflow."
+
+    # Wait for the job to complete
+    while True:
+        response = requests.get(
+            f"{test_endpoint}/api/2.1/jobs/runs/get",
+            json={
+                "run_id": response.json()["run_id"],
+            },
+            headers=headers,
+        )
+
+        assert response.status_code == 200, "Failed to get the job status."
+
+        status = response.json()["status"]
+        if status["state"] == "TERMINATED":
+            if status["termination_details"]["type"] == "SUCCESS":
+                break
+            else:
+                assert False, "Job failed. Please check the logs in the workspace."
+
+        time.sleep(60)
+        print("Job is still running...")  # noqa: T201