559 add inference job tests to makefile (#633)

* Added inference test to Makefile, cleaned deps * More deps * Fixed eval + inference tests * Addressed review comments - refactored target names and removed test-*-target wildcard to avoid recursive calls - added more comments to better explain what happens in tests - moved jobs tests away from backend Now test targets are: - test-all - test-sdk - test-sdk-unit - test-sdk-integration-container - test-backend - test-backend-unit - test-backend-integration-container - test-jobs - test-jobs-evaluation-unit - test-jobs-inference-unit * Fix CI tests * Added jobs to watched paths so we can update them live in the container
mozilla-ai · Jan 16, 2025 · 550fbe5 · 550fbe5
1 parent f2669bf
commit 550fbe5
Show file tree

Hide file tree

Showing 8 changed files with 80 additions and 427 deletions.
diff --git a/.devcontainer/docker-compose.override.yaml b/.devcontainer/docker-compose.override.yaml
@@ -18,6 +18,11 @@ services:
           action: sync
           ignore:
             - .venv/
+        - path: lumigator/python/mzai/jobs/
+          target: /mzai/lumigator/python/mzai/jobs
+          action: sync
+          ignore:
+            - .venv/
         - path: lumigator/python/mzai/schemas/
           target: /mzai/lumigator/python/mzai/schemas
           action: sync

diff --git a/.github/workflows/lumigator_pipeline.yaml b/.github/workflows/lumigator_pipeline.yaml
@@ -42,7 +42,7 @@ jobs:
     if: ${{ needs.lint.result == 'success' }}
     strategy:
       matrix:
-        test: ["sdk", "backend" ]
+        test: ["sdk", "backend", "jobs"]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -78,10 +78,7 @@ jobs:
         run: make start-lumigator-build
 
       - name: Run ${{ matrix.test }} integration tests
-        run: make test-${{ matrix.test }}-integration-target
-
-      - name: Run Backend integration tests
-        run: make test-backend-integration-target
+        run: make test-${{ matrix.test }}-integration
 
       - name: Collect Ray logs in case of failure
         if: failure()

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: local-up local-down local-logs clean-docker-buildcache clean-docker-images clean-docker-containers start-lumigator-external-services start-lumigator stop-lumigator test-sdk-unit test-sdk-integration test-sdk-integration-target test-sdk-target test-backend-unit test-backend-integration test-backend-integration-target test-backend-target test-all-target test-%
+.PHONY: local-up local-down local-logs clean-docker-buildcache clean-docker-images clean-docker-containers start-lumigator-external-services start-lumigator stop-lumigator test-sdk-unit test-sdk-integration test-sdk-integration-containers test-sdk test-backend-unit test-backend-integration test-backend-integration-containers test-backend test-jobs-evaluation-unit test-jobs-inference-unit test-jobs test-all
 
 SHELL:=/bin/bash
 UNAME:= $(shell uname -o)
@@ -9,21 +9,28 @@ KEEP_CONTAINERS_UP := $(shell grep -E '^KEEP_CONTAINERS_UP=' .env | cut -d'=' -f
 
 KEEP_CONTAINERS_UP ?= "FALSE"
 
-#used in docker-compose to choose the right Ray image
+# used in docker-compose to choose the right Ray image
 ARCH := $(shell uname -m)
 RAY_ARCH_SUFFIX :=
 
 ifeq ($(ARCH), arm64)
 	RAY_ARCH_SUFFIX := -aarch64
 endif
 
+# lumigator runs on a set of containers (backend, ray, minio, etc).
+# The following allows one to start all of them before calling a target
+# (typically a test), run the target, then pull everything down afterwards.
+# When testing, one can set the KEEP_CONTAINERS_UP env var to "TRUE"
+# so they can check logs from the running containers.
 define run_with_containers
 	@echo "No Lumigator containers are running. Starting containers..."
 	make start-lumigator-build
 	@if [ $(KEEP_CONTAINERS_UP) = "FALSE" ]; then echo "The script will remove containers after tests"; trap "cd $(PROJECT_ROOT); make stop-lumigator" EXIT; fi; \
 	make $(1)
 endef
 
+# `run_with_existing_containers` allows one to run a target (typically
+# a test) on already running containers.
 define run_with_existing_containers
 	@echo "Lumigator containers are already running."
 	@if [ -n "$(AUTO_TEST_RUN)" ]; then \
@@ -106,50 +113,68 @@ clean-docker-all: clean-docker-containers clean-docker-buildcache clean-docker-i
 
 clean-all: clean-docker-buildcache clean-docker-containers
 
+
+# SDK tests
+# We have both unit and integration tests for the SDK.
+# Integration tests require all containers to be up, so as a safety measure
+# `test-sdk-integration-containers` is usually called and this will either
+# start them if they are not present or use the currently running ones.
 test-sdk-unit:
 	cd lumigator/python/mzai/sdk/tests; \
 	uv run pytest -o python_files="unit/*/test_*.py unit/test_*.py"
 
-test-sdk-integration-target:
+test-sdk-integration:
 	cd lumigator/python/mzai/sdk/tests; \
 	uv run pytest -o python_files="integration/test_*.py integration/*/test_*.py"
 
-test-sdk-integration:
+test-sdk-integration-containers:
 ifeq ($(CONTAINERS_RUNNING),)
-	$(call run_with_containers, test-sdk-integration-target)
+	$(call run_with_containers, test-sdk-integration)
 else
-	$(call run_with_existing_containers, test-sdk-integration-target)
+	$(call run_with_existing_containers, test-sdk-integration)
 endif
 
+test-sdk: test-sdk-unit test-sdk-integration-containers
+
+
+# backend tests
+# We have both unit and integration tests for the backend.
+# Integration tests require all containers to be up, so as a safety measure
+# `test-sdk-integration-containers` is usually called and this will either
+# start them if they are not present or use the currently running ones.
 test-backend-unit:
 	cd lumigator/python/mzai/backend/; \
 	SQLALCHEMY_DATABASE_URL=sqlite:////tmp/local.db uv run pytest -o python_files="backend/tests/unit/*/test_*.py"
 
-test-backend-integration-target:
+test-backend-integration:
 	cd lumigator/python/mzai/backend/; \
 	docker container list --all; \
 	SQLALCHEMY_DATABASE_URL=sqlite:////tmp/local.db RAY_WORKER_GPUS="0.0" RAY_WORKER_GPUS_FRACTION="0.0" INFERENCE_PIP_REQS=../jobs/inference/requirements_cpu.txt INFERENCE_WORK_DIR=../jobs/inference EVALUATOR_PIP_REQS=../jobs/evaluator/requirements.txt EVALUATOR_WORK_DIR=../jobs/evaluator uv run pytest -s -o python_files="backend/tests/integration/*/test_*.py"
 
-test-backend-integration:
+test-backend-integration-containers:
 ifeq ($(CONTAINERS_RUNNING),)
-	$(call run_with_containers, test-backend-integration-target)
+	$(call run_with_containers, test-backend-integration)
 else
-	$(call run_with_existing_containers, test-backend-integration-target)
+	$(call run_with_existing_containers, test-backend-integration)
 endif
 
-test-jobs-unit:
+test-backend: test-backend-unit test-backend-integration-containers
+
+
+# jobs tests
+# We only have unit tests for jobs. They do not require any container to
+# be running, but they will set up a different, volatile python environment
+# with all the deps specified in their respective `requirements.txt` files.
+test-jobs-evaluation-unit:
 	cd lumigator/python/mzai/jobs/evaluator_lite; \
-	uv run pytest -o python_files="evaluator_lite/tests/test_*.py"
+	uv run --with pytest --with-requirements requirements.txt --isolated pytest
 
-test-sdk-target: test-sdk-unit test-sdk-integration
+test-jobs-inference-unit:
+	cd lumigator/python/mzai/jobs/inference; \
+	uv run --with pytest --with-requirements requirements.txt --isolated pytest
 
-test-backend-target: test-backend-unit test-backend-integration test-jobs-unit
+test-jobs-unit: test-jobs-evaluation-unit test-jobs-inference-unit
 
-test-all-target: test-sdk-target test-backend-target test-jobs-unit
 
-# Check whether there are already running containers before starting tests:
-#   If so, ask user if they want to proceed with the current deployment.
-#   If not: (1) start containers, (2) run tests, (3) tear down containers
-#   (regardless of tests outcome).
-test-%:
-	$(MAKE) test-$*-target
+# test everything
+test-all: test-sdk test-backend test-jobs-unit
diff --git a/lumigator/python/mzai/backend/uv.lock b/lumigator/python/mzai/backend/uv.lock
diff --git a/lumigator/python/mzai/jobs/evaluator_lite/tests/test_configs.py b/lumigator/python/mzai/jobs/evaluator_lite/tests/test_configs.py
@@ -11,7 +11,7 @@ def test_valid_config():
         name="my_eval_job",
         dataset=DatasetConfig(path="/path/to/dataset"),
         model=ModelConfig(path="/path/to/model"),
-        evaluation=EvaluationConfig(),
+        evaluation=EvaluationConfig(storage_path="/path/to/results"),
     )
     assert config
 
@@ -33,6 +33,6 @@ def test_valid_config_with_custom_metrics():
         name="my_eval_job",
         dataset=DatasetConfig(path="/path/to/dataset"),
         model=ModelConfig(path="/path/to/model"),
-        evaluation=EvaluationConfig(metrics=["bleu", "lumi"]),
+        evaluation=EvaluationConfig(metrics=["bleu", "lumi"], storage_path="/path/to/results"),
     )
     assert config
diff --git a/lumigator/python/mzai/jobs/inference/tests/data/config_full_hf.json b/lumigator/python/mzai/jobs/inference/tests/data/config_full_hf.json
@@ -1,7 +1,7 @@
 {
     "name": "full_inference_config",
     "dataset": { "path": "s3://lumigator-storage/datasets/deaddead-dead-dead-dead-deaddeaddead/dataset_name.csv" },
-    "pipeline": { "model_uri": "hf://facebook/bart-large-cnn", "task": "summarization" },
+    "hf_pipeline": { "model_uri": "hf://facebook/bart-large-cnn", "task": "summarization" },
     "job": {
         "max_samples": 10,
         "storage_path": "s3://lumigator-storage/jobs/results/",

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,11 +4,7 @@ version = "0.1.0"
 description = "Common deps to develop"
 requires-python = ">=3.11"
 dependencies = [
-    "huggingface-hub>=0.27.0",
-    "pandas>=2.2.3",
     "pre-commit>=4.0.1",
-    "pytest>=8.3.4",
-    "wandb>=0.19.1",
 ]
 
 [project.optional-dependencies]