From df65759d39096250ae8dd198ad647a1419995fcc Mon Sep 17 00:00:00 2001 From: B-Step62 Date: Mon, 23 Sep 2024 21:50:43 +0900 Subject: [PATCH] Add integration test for vectorstore Signed-off-by: B-Step62 --- .../integration_tests/test_chat_models.py | 11 +++ .../integration_tests/test_embeddings.py | 12 +++ .../integration_tests/test_vectorstore.py | 73 +++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 libs/databricks/tests/integration_tests/test_vectorstore.py diff --git a/libs/databricks/tests/integration_tests/test_chat_models.py b/libs/databricks/tests/integration_tests/test_chat_models.py index d517267..6c995de 100644 --- a/libs/databricks/tests/integration_tests/test_chat_models.py +++ b/libs/databricks/tests/integration_tests/test_chat_models.py @@ -1,3 +1,14 @@ +""" +This file contains the integration test for ChatDatabricks class. + +We run the integration tests nightly by the trusted CI/CD system defined in +a private repository, in order to securely run the tests. With this design, +integration test is not intended to be run manually by OSS contributors. +If you want to update the ChatDatabricks implementation and you think that you +need to update the corresponding integration test, please contact to the +maintainers of the repository to verify the changes. +""" + from typing import Annotated from unittest import mock diff --git a/libs/databricks/tests/integration_tests/test_embeddings.py b/libs/databricks/tests/integration_tests/test_embeddings.py index d0c6b10..d871cd5 100644 --- a/libs/databricks/tests/integration_tests/test_embeddings.py +++ b/libs/databricks/tests/integration_tests/test_embeddings.py @@ -1,7 +1,19 @@ +""" +This file contains the integration test for DatabricksEmbeddings class. + +We run the integration tests nightly by the trusted CI/CD system defined in +a private repository, in order to securely run the tests. With this design, +integration test is not intended to be run manually by OSS contributors. +If you want to update the DatabricksEmbeddings implementation and you think +that you need to update the corresponding integration test, please contact to +the maintainers of the repository to verify the changes. +""" + from langchain_databricks import DatabricksEmbeddings _TEST_ENDPOINT = "databricks-bge-large-en" + def test_embedding_documents() -> None: documents = ["foo bar"] embedding = DatabricksEmbeddings(endpoint=_TEST_ENDPOINT) diff --git a/libs/databricks/tests/integration_tests/test_vectorstore.py b/libs/databricks/tests/integration_tests/test_vectorstore.py new file mode 100644 index 0000000..49fffae --- /dev/null +++ b/libs/databricks/tests/integration_tests/test_vectorstore.py @@ -0,0 +1,73 @@ +""" +This file contains the integration test for DatabricksVectorSearch class. + +We run the integration tests nightly by the trusted CI/CD system defined in +a private repository, in order to securely run the tests. With this design, +integration test is not intended to be run manually by OSS contributors. +If you want to update the DatabricksVectorSearch implementation and you think +that you need to update the corresponding integration test, please contact to +the maintainers of the repository to verify the changes. +""" + +import os +import time + +import requests + + +def test_vectorstore(): + """ + We run the integration tests for vector store by Databricks Workflow, + because the setup is too complex to run within a single python file. + Thereby, this test simply triggers the workflow by calling the REST API. + """ + test_endpoint = os.getenv("DATABRICKS_HOST") + test_job_id = os.getenv("VS_TEST_JOB_ID") + headers = { + "Authorization": f"Bearer {os.getenv('DATABRICKS_TOKEN')}", + } + + # Check if there is any ongoing job run + response = requests.get( + f"{test_endpoint}/api/2.1/jobs/runs/list", + json={ + "job_id": test_job_id, + "active_only": True, + }, + headers=headers, + ) + no_active_run = "runs" not in response.json() or len(response.json()["runs"]) == 0 + assert no_active_run, "There is an ongoing job run. Please wait for it to complete." + + # Trigger the workflow + response = requests.post( + f"{test_endpoint}/api/2.1/jobs/run-now", + json={ + "job_id": test_job_id, + }, + headers=headers, + ) + + assert response.status_code == 200, "Failed to trigger the workflow." + + # Wait for the job to complete + while True: + response = requests.get( + f"{test_endpoint}/api/2.1/jobs/runs/get", + json={ + "run_id": response.json()["run_id"], + }, + headers=headers, + ) + + assert response.status_code == 200, "Failed to get the job status." + + status = response.json()["status"] + if status["state"] == "TERMINATED": + if status["termination_details"]["type"] == "SUCCESS": + break + else: + assert False, "Job failed. Please check the logs in the workspace." + + time.sleep(60) + print("Job is still running...") # noqa: T201