Skip to content

Commit

Permalink
Audio: OpenAI API (#377)
Browse files Browse the repository at this point in the history
* async requests enabled

* aiohttp checks

* fmt

* update audio endpoint

* update: route

* rename cat sound

* fix: docstring

* update openapi

* move jpg image
  • Loading branch information
michaelfeil authored Sep 24, 2024
1 parent 8690ebc commit 978757d
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 11 deletions.
2 changes: 1 addition & 1 deletion docs/assets/openapi.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion libs/embed_package/embed/_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def audio_embed(
"""Embed audios with a model.
>>> import requests, io
>>> url = "https://github.com/michaelfeil/infinity/raw/refs/heads/main/libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav"
>>> url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"
>>> ei = BatchedInference(model_id="laion/larger_clap_general", engine="torch")
>>> audio_embed_result = ei.audio_embed(model_id="laion/larger_clap_general", audios=[url])
>>> type(audio_embed_result)
Expand Down
2 changes: 1 addition & 1 deletion libs/infinity_emb/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Default target executed when no arguments are given to make.
all: help

precommit : | format spell_fix spell_check lint poetry_check test cli_v2_docs
precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs test

######################
# TESTING AND COVERAGE
Expand Down
4 changes: 2 additions & 2 deletions libs/infinity_emb/infinity_emb/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ async def audio_embed(
"""embed multiple audios
Kwargs:
audios (list[npt.NDArray]): list of audio data, to be embedded
audios (list[Union[str, Audiobytes]]): list of audio data, to be embedded
Raises:
ValueError: raised if engine is not started yet
Expand All @@ -240,7 +240,7 @@ async def audio_embed(
Returns:
list[EmbeddingReturnType]: embeddings
2D list-array of shape( len(sentences),embed_dim )
2D list-array of shape( len(sentences), embed_dim )
int: token usage
"""

Expand Down
14 changes: 13 additions & 1 deletion libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
"min_length": 1,
"max_length": 2048,
}
ITEMS_LIMIT_SMALL = {
"min_length": 1,
"max_length": 32,
}
except ImportError:
from pydantic import constr

Expand All @@ -41,6 +45,10 @@
"min_items": 1,
"max_items": 2048,
}
ITEMS_LIMIT_SMALL = {
"min_items": 1,
"max_items": 32,
}
HttpUrl, AnyUrl = str, str # type: ignore
else:

Expand Down Expand Up @@ -76,7 +84,7 @@ class ImageEmbeddingInput(BaseModel):
input: Union[ # type: ignore
conlist( # type: ignore
Annotated[AnyUrl, HttpUrl],
**ITEMS_LIMIT,
**ITEMS_LIMIT_SMALL,
),
Annotated[AnyUrl, HttpUrl],
]
Expand All @@ -85,6 +93,10 @@ class ImageEmbeddingInput(BaseModel):
user: Optional[str] = None


class AudioEmbeddingInput(ImageEmbeddingInput):
pass


class _EmbeddingObject(BaseModel):
object: Literal["embedding"] = "embedding"
embedding: Union[list[float], bytes]
Expand Down
58 changes: 56 additions & 2 deletions libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from infinity_emb.env import MANAGER
from infinity_emb.fastapi_schemas import docs, errors
from infinity_emb.fastapi_schemas.pymodels import (
AudioEmbeddingInput,
ClassifyInput,
ClassifyResult,
ImageEmbeddingInput,
Expand All @@ -27,6 +28,7 @@
)
from infinity_emb.log_handler import UVICORN_LOG_LEVELS, logger
from infinity_emb.primitives import (
AudioCorruption,
Device,
Dtype,
EmbeddingDtype,
Expand Down Expand Up @@ -349,7 +351,7 @@ async def _classify(data: ClassifyInput):
operation_id="embeddings_image",
)
async def _embeddings_image(data: ImageEmbeddingInput):
"""Encode Embeddings
"""Encode Embeddings from Image files
```python
import requests
Expand Down Expand Up @@ -384,7 +386,59 @@ async def _embeddings_image(data: ImageEmbeddingInput):
)
except ModelNotDeployedError as ex:
raise errors.OpenAIException(
f"ModelNotDeployedError: model=`{data.model}` does not support `embed`. Reason: {ex}",
f"ModelNotDeployedError: model=`{data.model}` does not support `image_embed`. Reason: {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except Exception as ex:
raise errors.OpenAIException(
f"InternalServerError: {ex}",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

@app.post(
f"{url_prefix}/embeddings_audio",
response_model=OpenAIEmbeddingResult,
response_class=responses.ORJSONResponse,
dependencies=route_dependencies,
operation_id="embeddings_audio",
)
async def _embeddings_audio(data: AudioEmbeddingInput):
"""Encode Embeddings from Audio files
```python
import requests
requests.post("http://..:7997/embeddings_audio",
json={"model":"laion/larger_clap_general","input":["https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"]})
"""
engine = _resolve_engine(data.model)
if hasattr(data.input, "host"):
# if it is a single url
audio_inputs = [str(data.input)]
else:
audio_inputs = [str(d) for d in data.input] # type: ignore
try:
logger.debug("[📝] Received request with %s Urls ", len(audio_inputs))
start = time.perf_counter()

embedding, usage = await engine.audio_embed(audios=audio_inputs) # type: ignore

duration = (time.perf_counter() - start) * 1000
logger.debug("[✅] Done in %s ms", duration)

return OpenAIEmbeddingResult.to_embeddings_response(
embeddings=embedding,
engine_args=engine.engine_args,
encoding_format=data.encoding_format,
usage=usage,
)
except AudioCorruption as ex:
raise errors.OpenAIException(
f"AudioCorruption, could not open {audio_inputs} -> {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except ModelNotDeployedError as ex:
raise errors.OpenAIException(
f"ModelNotDeployedError: model=`{data.model}` does not support `audio_embed`. Reason: {ex}",
code=status.HTTP_400_BAD_REQUEST,
)
except Exception as ex:
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions libs/infinity_emb/tests/unit_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ async def test_clap_like_model():
engine = AsyncEmbeddingEngine.from_args(
EngineArgs(model_name_or_path=model_name, dtype="float32")
)
url = "https://github.com/michaelfeil/infinity/raw/refs/heads/main/libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav"
url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"
bytes_url = requests.get(url).content

inputs = ["a sound of a cat", "a sound of a cat"]
Expand All @@ -211,7 +211,7 @@ async def test_clap_like_model():

@pytest.mark.anyio
async def test_clip_embed_pil_image_input():
img_url = "https://github.com/michaelfeil/infinity/raw/65afe2b3d68fda10429bf7f215fe645be20788e4/docs/assets/cats_coco_sample.jpg"
img_url = "https://github.com/michaelfeil/infinity/raw/06fd1f4d8f0a869f4482fc1c78b62a75ccbb66a1/docs/assets/cats_coco_sample.jpg"
response = requests.get(img_url, stream=True)
assert response.status_code == 200
img_data = response.raw
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_clap_like_model():
model = ClapLikeModel(
engine_args=EngineArgs(model_name_or_path=model_name, dtype="float16")
)
url = "https://github.com/michaelfeil/infinity/raw/refs/heads/main/libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav"
url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"
raw_bytes = requests.get(url, stream=True).content
data, samplerate = sf.read(io.BytesIO(raw_bytes))

Expand Down

0 comments on commit 978757d

Please sign in to comment.