Skip to content

Commit

Permalink
Integration tests in CI (#5)
Browse files Browse the repository at this point in the history
* Run integration tests in CI

* Clean up compile tests

* Utilize test utilities functions in chat history test
  • Loading branch information
maxjakob authored Apr 4, 2024
1 parent 10a96cb commit 89fafbe
Show file tree
Hide file tree
Showing 11 changed files with 131 additions and 154 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: compile-integration-test
name: integration-test

on:
workflow_call:
Expand All @@ -13,18 +13,32 @@ env:

jobs:
build:
name: "make integration_tests"
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: "poetry run pytest -m compile tests/integration_tests #${{ matrix.python-version }}"
services:
elasticsearch:
image: elasticsearch:8.13.0
env:
discovery.type: single-node
xpack.license.self_generated.type: trial
xpack.security.enabled: false # disable password and TLS; never do this in production!
ports:
- 9200:9200
options: >-
--health-cmd "curl --fail http://localhost:9200/_cluster/health"
--health-start-period 10s
--health-timeout 3s
--health-interval 3s
--health-retries 10
steps:
- uses: actions/checkout@v4

Expand All @@ -34,24 +48,12 @@ jobs:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: compile-integration
cache-key: integration-tests

- name: Install integration dependencies
- name: Install dependencies
shell: bash
run: poetry install --with=test_integration,test

- name: Check integration tests compile
- name: Run integration tests
shell: bash
run: poetry run pytest -m compile tests/integration_tests

- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'
run: make integration_test
7 changes: 4 additions & 3 deletions .github/workflows/check_diffs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,21 @@ jobs:
working-directory: ${{ matrix.working-directory }}
secrets: inherit

compile-integration-tests:
integration-test:
name: cd ${{ matrix.working-directory }}
needs: [ build ]
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
strategy:
matrix:
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
uses: ./.github/workflows/_compile_integration_test.yml
uses: ./.github/workflows/_integration_test.yml
with:
working-directory: ${{ matrix.working-directory }}
secrets: inherit

ci_success:
name: "CI Success"
needs: [build, lint, test, compile-integration-tests]
needs: [build, lint, test, integration-test]
if: |
always()
runs-on: ubuntu-latest
Expand Down
10 changes: 9 additions & 1 deletion libs/elasticsearch/langchain_elasticsearch/_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
return similarity


def check_if_model_deployed(client: Elasticsearch, model_id: str) -> None:
def model_must_be_deployed(client: Elasticsearch, model_id: str) -> None:
try:
dummy = {"x": "y"}
client.ml.infer_trained_model(model_id=model_id, docs=[dummy])
Expand All @@ -106,3 +106,11 @@ def check_if_model_deployed(client: Elasticsearch, model_id: str) -> None:
# This error is expected because we do not know the expected document
# shape and just use a dummy doc above.
pass


def model_is_deployed(es_client: Elasticsearch, model_id: str) -> bool:
try:
model_must_be_deployed(es_client, model_id)
return True
except NotFoundError:
return False
6 changes: 3 additions & 3 deletions libs/elasticsearch/langchain_elasticsearch/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@

from langchain_elasticsearch._utilities import (
DistanceStrategy,
check_if_model_deployed,
maximal_marginal_relevance,
model_must_be_deployed,
with_user_agent_header,
)

Expand Down Expand Up @@ -206,7 +206,7 @@ def before_index_setup(
self, client: "Elasticsearch", text_field: str, vector_query_field: str
) -> None:
if self.query_model_id:
check_if_model_deployed(client, self.query_model_id)
model_must_be_deployed(client, self.query_model_id)

def index(
self,
Expand Down Expand Up @@ -352,7 +352,7 @@ def before_index_setup(
self, client: "Elasticsearch", text_field: str, vector_query_field: str
) -> None:
if self.model_id:
check_if_model_deployed(client, self.model_id)
model_must_be_deployed(client, self.model_id)

# Create a pipeline for the model
client.ingest.put_pipeline(
Expand Down
1 change: 0 additions & 1 deletion libs/elasticsearch/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,5 @@ addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5
markers = [
"requires: mark tests as requiring a specific library",
"asyncio: mark tests as requiring asyncio",
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"
47 changes: 29 additions & 18 deletions libs/elasticsearch/tests/integration_tests/_test_utilities.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,37 @@
import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from elastic_transport import Transport
from elasticsearch import Elasticsearch


def read_env() -> Dict:
url = os.environ.get("ES_URL", "http://localhost:9200")
cloud_id = os.environ.get("ES_CLOUD_ID")
api_key = os.environ.get("ES_API_KEY")

if cloud_id:
return {"es_cloud_id": cloud_id, "es_api_key": api_key}
return {"es_url": url}


def create_es_client(
es_params: Optional[Dict[str, str]] = None, es_kwargs: Dict = {}
) -> Elasticsearch:
if es_params is None:
es_params = read_env()
if not es_kwargs:
es_kwargs = {}

if "es_cloud_id" in es_params:
return Elasticsearch(
cloud_id=es_params["es_cloud_id"],
api_key=es_params["es_api_key"],
**es_kwargs,
)
return Elasticsearch(hosts=[es_params["es_url"]], **es_kwargs)


def clear_test_indices(es: Elasticsearch) -> None:
index_names = es.indices.get(index="_all").keys()
for index_name in index_names:
Expand All @@ -23,20 +50,4 @@ def perform_request(self, *args, **kwargs): # type: ignore
self.requests.append(kwargs)
return super().perform_request(*args, **kwargs)

es_url = os.environ.get("ES_URL", "http://localhost:9200")
cloud_id = os.environ.get("ES_CLOUD_ID")
api_key = os.environ.get("ES_API_KEY")

if cloud_id:
# Running this integration test with Elastic Cloud
# Required for in-stack inference testing (ELSER + model_id)
es = Elasticsearch(
cloud_id=cloud_id,
api_key=api_key,
transport_class=CustomTransport,
)
else:
# Running this integration test with local docker instance
es = Elasticsearch(hosts=[es_url], transport_class=CustomTransport)

return es
return create_es_client(es_kwargs=dict(transport_class=CustomTransport))
7 changes: 3 additions & 4 deletions libs/elasticsearch/tests/integration_tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ version: "3"

services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.12.1 # https://www.docker.elastic.co/r/elasticsearch/elasticsearch
image: elasticsearch:8.13.0
environment:
- discovery.type=single-node
- xpack.security.enabled=false # security has been disabled, so no login or password is required.
- xpack.security.http.ssl.enabled=false
- xpack.license.self_generated.type=trial
- xpack.security.enabled=false # disable password and TLS; never do this in production!
ports:
- "9200:9200"
healthcheck:
Expand All @@ -20,7 +19,7 @@ services:
retries: 60

kibana:
image: docker.elastic.co/kibana/kibana:8.12.1
image: kibana:8.13.0
environment:
- ELASTICSEARCH_URL=http://elasticsearch:9200
ports:
Expand Down
45 changes: 15 additions & 30 deletions libs/elasticsearch/tests/integration_tests/test_chat_history.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import os
import uuid
from typing import Generator, Union

Expand All @@ -9,6 +8,12 @@

from langchain_elasticsearch.chat_history import ElasticsearchChatMessageHistory

from ._test_utilities import (
clear_test_indices,
create_es_client,
read_env,
)

"""
cd tests/integration_tests
docker-compose up elasticsearch
Expand All @@ -24,35 +29,15 @@
class TestElasticsearch:
@pytest.fixture(scope="class", autouse=True)
def elasticsearch_connection(self) -> Union[dict, Generator[dict, None, None]]:
# Run this integration test against Elasticsearch on localhost,
# or an Elastic Cloud instance
from elasticsearch import Elasticsearch

es_url = os.environ.get("ES_URL", "http://localhost:9200")
es_cloud_id = os.environ.get("ES_CLOUD_ID")
es_api_key = os.environ.get("ES_API_KEY")

if es_cloud_id:
es = Elasticsearch(
cloud_id=es_cloud_id,
api_key=es_api_key,
)
yield {
"es_cloud_id": es_cloud_id,
"es_api_key": es_api_key,
}

else:
# Running this integration test with local docker instance
es = Elasticsearch(hosts=es_url)
yield {"es_url": es_url}

# Clear all indexes
index_names = es.indices.get(index="_all").keys()
for index_name in index_names:
if index_name.startswith("test_"):
es.indices.delete(index=index_name)
es.indices.refresh(index="_all")
params = read_env()
es = create_es_client(params)

yield params

# clear indices
clear_test_indices(es)

return None

@pytest.fixture(scope="function")
def index_name(self) -> str:
Expand Down
7 changes: 0 additions & 7 deletions libs/elasticsearch/tests/integration_tests/test_compile.py

This file was deleted.

52 changes: 24 additions & 28 deletions libs/elasticsearch/tests/integration_tests/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,44 @@
"""Test elasticsearch_embeddings embeddings."""

import os

import pytest
from langchain_core.utils import get_from_env
from elasticsearch import Elasticsearch

from langchain_elasticsearch._utilities import model_is_deployed
from langchain_elasticsearch.embeddings import ElasticsearchEmbeddings

# deployed with
# https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-text-emb-vector-search-example.html
DEFAULT_MODEL = "sentence-transformers__msmarco-minilm-l-12-v3"
DEFAULT_NUM_DIMENSIONS = "384"


@pytest.fixture
def model_id() -> str:
return get_from_env("model_id", "MODEL_ID", DEFAULT_MODEL)

MODEL_ID = os.getenv("MODEL_ID", "sentence-transformers__msmarco-minilm-l-12-v3")
NUM_DIMENSIONS = int(os.getenv("NUM_DIMENTIONS", "384"))

@pytest.fixture
def expected_num_dimensions() -> int:
return int(
get_from_env(
"expected_num_dimensions", "EXPECTED_NUM_DIMENSIONS", DEFAULT_NUM_DIMENSIONS
)
)
ES_URL = os.environ.get("ES_URL", "http://localhost:9200")
ES_CLIENT = Elasticsearch(hosts=[ES_URL])


def test_elasticsearch_embedding_documents(
model_id: str, expected_num_dimensions: int
) -> None:
@pytest.mark.skipif(
not model_is_deployed(ES_CLIENT, MODEL_ID),
reason=f"{MODEL_ID} model is not deployed in ML Node, skipping test",
)
def test_elasticsearch_embedding_documents() -> None:
"""Test Elasticsearch embedding documents."""
documents = ["foo bar", "bar foo", "foo"]
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
embedding = ElasticsearchEmbeddings.from_credentials(MODEL_ID)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == expected_num_dimensions
assert len(output[1]) == expected_num_dimensions
assert len(output[2]) == expected_num_dimensions
assert len(output[0]) == NUM_DIMENSIONS
assert len(output[1]) == NUM_DIMENSIONS
assert len(output[2]) == NUM_DIMENSIONS


def test_elasticsearch_embedding_query(
model_id: str, expected_num_dimensions: int
) -> None:
@pytest.mark.skipif(
not model_is_deployed(ES_CLIENT, MODEL_ID),
reason=f"{MODEL_ID} model is not deployed in ML Node, skipping test",
)
def test_elasticsearch_embedding_query() -> None:
"""Test Elasticsearch embedding query."""
document = "foo bar"
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
embedding = ElasticsearchEmbeddings.from_credentials(MODEL_ID)
output = embedding.embed_query(document)
assert len(output) == expected_num_dimensions
assert len(output) == NUM_DIMENSIONS
Loading

0 comments on commit 89fafbe

Please sign in to comment.