Skip to content

Commit

Permalink
Run integration tests in CI
Browse files Browse the repository at this point in the history
  • Loading branch information
maxjakob committed Apr 3, 2024
1 parent 8a30585 commit ec8a032
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 116 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: compile-integration-test
name: integration-test

on:
workflow_call:
Expand All @@ -13,18 +13,32 @@ env:

jobs:
build:
name: "make integration_tests"
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: "poetry run pytest -m compile tests/integration_tests #${{ matrix.python-version }}"
services:
elasticsearch:
image: elasticsearch:8.13.0
env:
discovery.type: single-node
xpack.license.self_generated.type: trial
xpack.security.enabled: false # disable password and TLS; never do this in production!
ports:
- 9200:9200
options: >-
--health-cmd "curl --fail http://localhost:9200/_cluster/health"
--health-start-period 10s
--health-timeout 3s
--health-interval 3s
--health-retries 10
steps:
- uses: actions/checkout@v4

Expand All @@ -34,24 +48,12 @@ jobs:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: compile-integration
cache-key: integration-tests

- name: Install integration dependencies
- name: Install dependencies
shell: bash
run: poetry install --with=test_integration,test

- name: Check integration tests compile
- name: Run integration tests
shell: bash
run: poetry run pytest -m compile tests/integration_tests

- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'
run: make integration_test
7 changes: 4 additions & 3 deletions .github/workflows/check_diffs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,21 @@ jobs:
working-directory: ${{ matrix.working-directory }}
secrets: inherit

compile-integration-tests:
integration-test:
name: cd ${{ matrix.working-directory }}
needs: [ build ]
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
strategy:
matrix:
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
uses: ./.github/workflows/_compile_integration_test.yml
uses: ./.github/workflows/_integration_test.yml
with:
working-directory: ${{ matrix.working-directory }}
secrets: inherit

ci_success:
name: "CI Success"
needs: [build, lint, test, compile-integration-tests]
needs: [build, lint, test, integration-test]
if: |
always()
runs-on: ubuntu-latest
Expand Down
10 changes: 9 additions & 1 deletion libs/elasticsearch/langchain_elasticsearch/_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
return similarity


def check_if_model_deployed(client: Elasticsearch, model_id: str) -> None:
def model_must_be_deployed(client: Elasticsearch, model_id: str) -> None:
try:
dummy = {"x": "y"}
client.ml.infer_trained_model(model_id=model_id, docs=[dummy])
Expand All @@ -106,3 +106,11 @@ def check_if_model_deployed(client: Elasticsearch, model_id: str) -> None:
# This error is expected because we do not know the expected document
# shape and just use a dummy doc above.
pass


def model_is_deployed(es_client: Elasticsearch, model_id: str) -> bool:
try:
model_must_be_deployed(es_client, model_id)
return True
except NotFoundError:
return False
6 changes: 3 additions & 3 deletions libs/elasticsearch/langchain_elasticsearch/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@

from langchain_elasticsearch._utilities import (
DistanceStrategy,
check_if_model_deployed,
maximal_marginal_relevance,
model_must_be_deployed,
with_user_agent_header,
)

Expand Down Expand Up @@ -204,7 +204,7 @@ def before_index_setup(
self, client: "Elasticsearch", text_field: str, vector_query_field: str
) -> None:
if self.query_model_id:
check_if_model_deployed(client, self.query_model_id)
model_must_be_deployed(client, self.query_model_id)

def index(
self,
Expand Down Expand Up @@ -348,7 +348,7 @@ def before_index_setup(
self, client: "Elasticsearch", text_field: str, vector_query_field: str
) -> None:
if self.model_id:
check_if_model_deployed(client, self.model_id)
model_must_be_deployed(client, self.model_id)

# Create a pipeline for the model
client.ingest.put_pipeline(
Expand Down
43 changes: 25 additions & 18 deletions libs/elasticsearch/tests/integration_tests/_test_utilities.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,33 @@
import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from elastic_transport import Transport
from elasticsearch import Elasticsearch


def read_es_params() -> Dict:
url = os.environ.get("ES_URL", "http://localhost:9200")
cloud_id = os.environ.get("ES_CLOUD_ID")
api_key = os.environ.get("ES_API_KEY")

if cloud_id:
return {"es_cloud_id": cloud_id, "es_api_key": api_key}
return {"es_url": url}


def create_es_client(
es_params: Optional[Dict[str, str]] = None, **kwargs: Transport
) -> Elasticsearch:
if es_params is None:
es_params = read_es_params()

if "es_cloud_id" in es_params:
return Elasticsearch(
cloud_id=es_params["es_cloud_id"], api_key=es_params["es_api_key"]
)
return Elasticsearch(hosts=[es_params["es_url"]], **kwargs)


def clear_test_indices(es: Elasticsearch) -> None:
index_names = es.indices.get(index="_all").keys()
for index_name in index_names:
Expand All @@ -23,20 +46,4 @@ def perform_request(self, *args, **kwargs): # type: ignore
self.requests.append(kwargs)
return super().perform_request(*args, **kwargs)

es_url = os.environ.get("ES_URL", "http://localhost:9200")
cloud_id = os.environ.get("ES_CLOUD_ID")
api_key = os.environ.get("ES_API_KEY")

if cloud_id:
# Running this integration test with Elastic Cloud
# Required for in-stack inference testing (ELSER + model_id)
es = Elasticsearch(
cloud_id=cloud_id,
api_key=api_key,
transport_class=CustomTransport,
)
else:
# Running this integration test with local docker instance
es = Elasticsearch(hosts=[es_url], transport_class=CustomTransport)

return es
return create_es_client(transport_class=CustomTransport)
7 changes: 3 additions & 4 deletions libs/elasticsearch/tests/integration_tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ version: "3"

services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.12.1 # https://www.docker.elastic.co/r/elasticsearch/elasticsearch
image: elasticsearch:8.13.0
environment:
- discovery.type=single-node
- xpack.security.enabled=false # security has been disabled, so no login or password is required.
- xpack.security.http.ssl.enabled=false
- xpack.license.self_generated.type=trial
- xpack.security.enabled=false # disable password and TLS; never do this in production!
ports:
- "9200:9200"
healthcheck:
Expand All @@ -20,7 +19,7 @@ services:
retries: 60

kibana:
image: docker.elastic.co/kibana/kibana:8.12.1
image: kibana:8.13.0
environment:
- ELASTICSEARCH_URL=http://elasticsearch:9200
ports:
Expand Down
52 changes: 24 additions & 28 deletions libs/elasticsearch/tests/integration_tests/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,44 @@
"""Test elasticsearch_embeddings embeddings."""

import os

import pytest
from langchain_core.utils import get_from_env
from elasticsearch import Elasticsearch

from langchain_elasticsearch._utilities import model_is_deployed
from langchain_elasticsearch.embeddings import ElasticsearchEmbeddings

# deployed with
# https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-text-emb-vector-search-example.html
DEFAULT_MODEL = "sentence-transformers__msmarco-minilm-l-12-v3"
DEFAULT_NUM_DIMENSIONS = "384"


@pytest.fixture
def model_id() -> str:
return get_from_env("model_id", "MODEL_ID", DEFAULT_MODEL)

MODEL_ID = os.getenv("MODEL_ID", "sentence-transformers__msmarco-minilm-l-12-v3")
NUM_DIMENSIONS = int(os.getenv("NUM_DIMENTIONS", "384"))

@pytest.fixture
def expected_num_dimensions() -> int:
return int(
get_from_env(
"expected_num_dimensions", "EXPECTED_NUM_DIMENSIONS", DEFAULT_NUM_DIMENSIONS
)
)
ES_URL = os.environ.get("ES_URL", "http://localhost:9200")
ES_CLIENT = Elasticsearch(hosts=[ES_URL])


def test_elasticsearch_embedding_documents(
model_id: str, expected_num_dimensions: int
) -> None:
@pytest.mark.skipif(
not model_is_deployed(ES_CLIENT, MODEL_ID),
reason=f"{MODEL_ID} model is not deployed in ML Node, skipping test",
)
def test_elasticsearch_embedding_documents() -> None:
"""Test Elasticsearch embedding documents."""
documents = ["foo bar", "bar foo", "foo"]
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
embedding = ElasticsearchEmbeddings.from_credentials(MODEL_ID)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == expected_num_dimensions
assert len(output[1]) == expected_num_dimensions
assert len(output[2]) == expected_num_dimensions
assert len(output[0]) == NUM_DIMENSIONS
assert len(output[1]) == NUM_DIMENSIONS
assert len(output[2]) == NUM_DIMENSIONS


def test_elasticsearch_embedding_query(
model_id: str, expected_num_dimensions: int
) -> None:
@pytest.mark.skipif(
not model_is_deployed(ES_CLIENT, MODEL_ID),
reason=f"{MODEL_ID} model is not deployed in ML Node, skipping test",
)
def test_elasticsearch_embedding_query() -> None:
"""Test Elasticsearch embedding query."""
document = "foo bar"
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
embedding = ElasticsearchEmbeddings.from_credentials(MODEL_ID)
output = embedding.embed_query(document)
assert len(output) == expected_num_dimensions
assert len(output) == NUM_DIMENSIONS
Loading

0 comments on commit ec8a032

Please sign in to comment.