Skip to content

Commit

Permalink
Merge pull request #43 from mobiusml/integration_tests
Browse files Browse the repository at this point in the history
Integration Tests
  • Loading branch information
movchan74 authored Feb 8, 2024
2 parents c0a10b9 + 3c28ec8 commit f852f42
Show file tree
Hide file tree
Showing 117 changed files with 12,875 additions and 1,071 deletions.
3 changes: 3 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CUDA_VISIBLE_DEVICES=""
USE_DEPLOYMENT_CACHE = True
SAVE_DEPLOYMENT_CACHE = True
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ jobs:
sudo apt-get update
sudo apt-get install ffmpeg
- name: Test with pytest
run: poetry run pytest
run: poetry run pytest -vv
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ celerybeat.pid
*.sage.py

# Environments
.env
.venv
env/
venv/
Expand Down
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,52 @@ that Ruff problems appear while you edit, and formatting is applied
automatically on save.


## Testing

The project uses pytest for testing. To run the tests, use the following command:

```bash
poetry run pytest
```

If you are using VS Code, you can run the tests using the Test Explorer that is installed with the [Python extension](https://code.visualstudio.com/docs/python/testing).

There are a few environment variables that can be set to control the behavior of the tests:
- `USE_DEPLOYMENT_CACHE`: If set to `true`, the tests will use the deployment cache to avoid downloading the models and running the deployments. This is useful for running integration tests faster and in the environment where GPU is not available.
- `SAVE_DEPLOYMENT_CACHE`: If set to `true`, the tests will save the deployment cache after running the deployments. This is useful for updating the deployment cache if new deployments or tests are added.

### How to use the deployment cache environment variables

Here are some examples of how to use the deployment cache environment variables.

#### Do you want to run the tests normally using GPU?

```bash
USE_DEPLOYMENT_CACHE=false
SAVE_DEPLOYMENT_CACHE=false
```

This is the default behavior. The tests will run normally using GPU and the deployment cache will be completely ignored.

#### Do you want to run the tests faster without GPU?

```bash
USE_DEPLOYMENT_CACHE=true
SAVE_DEPLOYMENT_CACHE=false
```

This will run the tests using the deployment cache to avoid downloading the models and running the deployments. The deployment cache will not be updated after running the deployments. Only use it if you are sure that the deployment cache is up to date.

#### Do you want to update the deployment cache?

```bash
USE_DEPLOYMENT_CACHE=false
SAVE_DEPLOYMENT_CACHE=true
```

This will run the tests normally using GPU and save the deployment cache after running the deployments. Use it if you have added new deployments or tests and want to update the deployment cache.


## Databases
The project uses two databases: a vector database as well as a tradtional SQL database,
referred to internally as vectorstore and datastore, respectively.
Expand Down
31 changes: 3 additions & 28 deletions aana/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,40 +81,15 @@ def custom_exception_handler(request: Request | None, exc_raw: Exception):
)


@app.exception_handler(BaseException)
async def pipeline_exception_handler(request: Request, exc: BaseException):
@app.exception_handler(Exception)
async def pipeline_exception_handler(request: Request, exc: Exception):
"""This handler is used to handle exceptions raised by the Mobius Pipeline and Aana application.
Args:
request (Request): The request object
exc (BaseException): The exception raised
exc (Exception): The exception raised
Returns:
JSONResponse: JSON response with the error details
"""
return custom_exception_handler(request, exc)


@app.exception_handler(RayTaskError)
async def ray_task_error_handler(request: Request, exc: RayTaskError):
"""This handler is used to handle RayTaskError exceptions.
Args:
request (Request): The request object
exc (RayTaskError): The exception raised
Returns:
JSONResponse: JSON response with the error details. The response contains the following fields:
error: The name of the exception class.
message: The message of the exception.
stacktrace: The stacktrace of the exception.
"""
error = exc.__class__.__name__
stacktrace = traceback.format_exc()

return AanaJSONResponse(
status_code=400,
content=ExceptionResponseModel(
error=error, message=str(exc), stacktrace=stacktrace
).dict(),
)
29 changes: 1 addition & 28 deletions aana/api/responses.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,9 @@
from pathlib import Path
from typing import Any

import orjson
from fastapi.responses import JSONResponse
from pydantic import BaseModel


def json_serializer_default(obj: Any) -> Any:
"""Default function for json serializer to handle pydantic models.
If json serializer does not know how to serialize an object, it calls the default function.
If we see that the object is a pydantic model,
we call the dict method to get the dictionary representation of the model
that json serializer can deal with.
If the object is not a pydantic model, we raise a TypeError.
Args:
obj (Any): The object to serialize.
Returns:
Any: The serializable object.
Raises:
TypeError: If the object is not a pydantic model.
"""
if isinstance(obj, BaseModel):
return obj.dict()
if isinstance(obj, Path):
return str(obj)
raise TypeError
from aana.utils.json import json_serializer_default


class AanaJSONResponse(JSONResponse):
Expand Down
14 changes: 14 additions & 0 deletions aana/configs/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,17 @@ def run_alembic_migrations(settings):

alembic_config = get_alembic_config(settings, ini_file_path, alembic_data_path)
command.upgrade(alembic_config, "head")


def drop_all_tables(settings):
"""Drops all tables in the database."""
# TODO: only allow this in testing mode
current_path = Path(__file__)
aana_root = current_path.parent.parent # go up two directories
if aana_root.name != "aana": # we are not in the right place
raise RuntimeError("Not in right directory, exiting.") # noqa: TRY003
ini_file_path = aana_root / "alembic.ini"
alembic_data_path = aana_root / "alembic"

alembic_config = get_alembic_config(settings, ini_file_path, alembic_data_path)
command.downgrade(alembic_config, "base")
90 changes: 8 additions & 82 deletions aana/configs/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,6 @@
],
),
],
"video": [
Endpoint(
name="video_extract_frames",
path="/video/extract_frames",
summary="Extract frames from a video",
outputs=[
EndpointOutput(name="timestamps", output="timestamps"),
EndpointOutput(name="duration", output="duration"),
],
)
],
"whisper": [
Endpoint(
name="whisper_transcribe",
Expand Down Expand Up @@ -124,8 +113,6 @@
),
],
),
],
"chat_with_video_test": [
Endpoint(
name="delete_media_id",
path="/video/delete",
Expand All @@ -136,54 +123,6 @@
),
],
"chat_with_video": [
Endpoint(
name="delete_media_id",
path="/video/delete",
summary="Delete a video",
outputs=[
EndpointOutput(name="deleted_media_id", output="deleted_media_id")
],
),
Endpoint(
name="blip2_video_generate",
path="/video/generate_captions",
summary="Generate captions for videos using BLIP2 OPT-2.7B",
outputs=[
EndpointOutput(
name="captions",
output="video_captions_hf_blip2_opt_2_7b",
streaming=True,
),
EndpointOutput(
name="timestamps", output="video_timestamps", streaming=True
),
],
streaming=True,
),
Endpoint(
name="whisper_transcribe",
path="/video/transcribe",
summary="Transcribe a video using Whisper Medium",
outputs=[
EndpointOutput(
name="transcription",
output="video_transcriptions_whisper_medium",
streaming=True,
),
EndpointOutput(
name="segments",
output="video_transcriptions_segments_whisper_medium",
streaming=True,
),
EndpointOutput(
name="info",
output="video_transcriptions_info_whisper_medium",
streaming=True,
),
EndpointOutput(name="transcription_id", output="transcription_id"),
],
streaming=True,
),
Endpoint(
name="index_video_stream",
path="/video/index_stream",
Expand Down Expand Up @@ -217,27 +156,6 @@
],
streaming=True,
),
Endpoint(
name="llm_generate",
path="/llm/generate",
summary="Generate text using LLaMa2 7B Chat",
outputs=[
EndpointOutput(name="completion", output="vllm_llama2_7b_chat_output")
],
),
Endpoint(
name="llm_generate_stream",
path="/llm/generate_stream",
summary="Generate text using LLaMa2 7B Chat (streaming)",
outputs=[
EndpointOutput(
name="completion",
output="vllm_llama2_7b_chat_output_stream",
streaming=True,
)
],
streaming=True,
),
Endpoint(
name="video_chat_stream",
path="/video/chat_stream",
Expand All @@ -259,5 +177,13 @@
EndpointOutput(name="metadata", output="video_metadata"),
],
),
Endpoint(
name="delete_media_id",
path="/video/delete",
summary="Delete a video",
outputs=[
EndpointOutput(name="deleted_media_id", output="deleted_media_id")
],
),
],
}
10 changes: 10 additions & 0 deletions aana/configs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
from aana.configs.db import DBConfig


class TestSettings(BaseSettings):
"""A pydantic model for test settings."""

test_mode: bool = False
use_deployment_cache: bool = False # use cached deployment results for testing
save_deployment_cache: bool = False # save deployment results to cache for testing


class Settings(BaseSettings):
"""A pydantic model for SDK settings."""

Expand All @@ -17,5 +25,7 @@ class Settings(BaseSettings):
"datastore_config": {"path": Path("/var/lib/aana_data")},
}

test: TestSettings = TestSettings()


settings = Settings()
12 changes: 10 additions & 2 deletions aana/deployments/base_deployment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any

from aana.configs.settings import settings


class BaseDeployment:
"""Base class for all deployments.
Expand All @@ -19,8 +21,14 @@ async def reconfigure(self, config: dict[str, Any]):
The method is called when the deployment is updated.
"""
self.config = config
await self.apply_config(config)
self.configured = True
if settings.test.test_mode and settings.test.use_deployment_cache:
# If we are in testing mode and we want to use the cache,
# we don't need to load the model
self.configured = True
return
else:
await self.apply_config(config)
self.configured = True

async def apply_config(self, config: dict[str, Any]):
"""Apply the configuration.
Expand Down
6 changes: 6 additions & 0 deletions aana/deployments/hf_blip2_deployment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any, TypedDict

import torch
import transformers
from pydantic import BaseModel, Field
from ray import serve
from transformers import Blip2ForConditionalGeneration, Blip2Processor
Expand All @@ -10,6 +11,7 @@
from aana.models.core.dtype import Dtype
from aana.models.core.image import Image
from aana.utils.batch_processor import BatchProcessor
from aana.utils.test import test_cache


class HFBlip2Config(BaseModel):
Expand Down Expand Up @@ -97,6 +99,7 @@ async def apply_config(self, config: dict[str, Any]):
self.processor = Blip2Processor.from_pretrained(self.model_id)
self.model.to(self.device)

@test_cache
async def generate(self, image: Image) -> CaptioningOutput:
"""Generate captions for the given image.
Expand All @@ -115,6 +118,7 @@ async def generate(self, image: Image) -> CaptioningOutput:
)
return CaptioningOutput(caption=captions["captions"][0])

@test_cache
async def generate_batch(self, **kwargs) -> CaptioningBatchOutput:
"""Generate captions for the given images.
Expand Down Expand Up @@ -149,6 +153,8 @@ def _generate(self, images: list[Image]) -> CaptioningBatchOutput:
Raises:
InferenceException: if the inference fails
"""
# Set the seed to make the results reproducible
transformers.set_seed(42)
# Loading images
numpy_images = [im.get_numpy() for im in images]
inputs = self.processor(numpy_images, return_tensors="pt").to(
Expand Down
Loading

0 comments on commit f852f42

Please sign in to comment.