diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json index 0f771751..ad8c706b 100644 --- a/docs/assets/openapi.json +++ b/docs/assets/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.53"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":" Embeddings Image","description":"Encode Embeddings\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"http://images.cocodataset.org/val2017/000000039769.jpg\"]})","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"type":"string","minLength":1,"format":"uri"},"type":"array","maxItems":2048,"minItems":1},{"type":"string","minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"OpenAIEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult"},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"}},"type":"object","required":["query","documents"],"title":"RerankInput"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"items":{"type":"number"},"type":"array","title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.57"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":" Embeddings Image","description":"Encode Embeddings from Image files\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"http://images.cocodataset.org/val2017/000000039769.jpg\"]})","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_audio":{"post":{"summary":" Embeddings Audio","description":"Encode Embeddings from Audio files\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\"model\":\"laion/larger_clap_general\",\"input\":[\"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"]})","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"type":"string","minLength":1,"format":"uri"},"type":"array","maxItems":32,"minItems":1},{"type":"string","minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"type":"string","minLength":1,"format":"uri"},"type":"array","maxItems":32,"minItems":1},{"type":"string","minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"OpenAIEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file diff --git a/libs/embed_package/embed/_infer.py b/libs/embed_package/embed/_infer.py index b04a6856..a4a80160 100644 --- a/libs/embed_package/embed/_infer.py +++ b/libs/embed_package/embed/_infer.py @@ -111,7 +111,7 @@ def audio_embed( """Embed audios with a model. >>> import requests, io - >>> url = "https://github.com/michaelfeil/infinity/raw/refs/heads/main/libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav" + >>> url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" >>> ei = BatchedInference(model_id="laion/larger_clap_general", engine="torch") >>> audio_embed_result = ei.audio_embed(model_id="laion/larger_clap_general", audios=[url]) >>> type(audio_embed_result) diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile index 72736375..0a71c91f 100644 --- a/libs/infinity_emb/Makefile +++ b/libs/infinity_emb/Makefile @@ -3,7 +3,7 @@ # Default target executed when no arguments are given to make. all: help -precommit : | format spell_fix spell_check lint poetry_check test cli_v2_docs +precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs test ###################### # TESTING AND COVERAGE diff --git a/libs/infinity_emb/infinity_emb/engine.py b/libs/infinity_emb/infinity_emb/engine.py index 9bb9565e..80af903d 100644 --- a/libs/infinity_emb/infinity_emb/engine.py +++ b/libs/infinity_emb/infinity_emb/engine.py @@ -231,7 +231,7 @@ async def audio_embed( """embed multiple audios Kwargs: - audios (list[npt.NDArray]): list of audio data, to be embedded + audios (list[Union[str, Audiobytes]]): list of audio data, to be embedded Raises: ValueError: raised if engine is not started yet @@ -240,7 +240,7 @@ async def audio_embed( Returns: list[EmbeddingReturnType]: embeddings - 2D list-array of shape( len(sentences),embed_dim ) + 2D list-array of shape( len(sentences), embed_dim ) int: token usage """ diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index aba79cf4..3852b497 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -33,6 +33,10 @@ "min_length": 1, "max_length": 2048, } + ITEMS_LIMIT_SMALL = { + "min_length": 1, + "max_length": 32, + } except ImportError: from pydantic import constr @@ -41,6 +45,10 @@ "min_items": 1, "max_items": 2048, } + ITEMS_LIMIT_SMALL = { + "min_items": 1, + "max_items": 32, + } HttpUrl, AnyUrl = str, str # type: ignore else: @@ -76,7 +84,7 @@ class ImageEmbeddingInput(BaseModel): input: Union[ # type: ignore conlist( # type: ignore Annotated[AnyUrl, HttpUrl], - **ITEMS_LIMIT, + **ITEMS_LIMIT_SMALL, ), Annotated[AnyUrl, HttpUrl], ] @@ -85,6 +93,10 @@ class ImageEmbeddingInput(BaseModel): user: Optional[str] = None +class AudioEmbeddingInput(ImageEmbeddingInput): + pass + + class _EmbeddingObject(BaseModel): object: Literal["embedding"] = "embedding" embedding: Union[list[float], bytes] diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 7da2bb4d..73bb5996 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -16,6 +16,7 @@ from infinity_emb.env import MANAGER from infinity_emb.fastapi_schemas import docs, errors from infinity_emb.fastapi_schemas.pymodels import ( + AudioEmbeddingInput, ClassifyInput, ClassifyResult, ImageEmbeddingInput, @@ -27,6 +28,7 @@ ) from infinity_emb.log_handler import UVICORN_LOG_LEVELS, logger from infinity_emb.primitives import ( + AudioCorruption, Device, Dtype, EmbeddingDtype, @@ -349,7 +351,7 @@ async def _classify(data: ClassifyInput): operation_id="embeddings_image", ) async def _embeddings_image(data: ImageEmbeddingInput): - """Encode Embeddings + """Encode Embeddings from Image files ```python import requests @@ -384,7 +386,59 @@ async def _embeddings_image(data: ImageEmbeddingInput): ) except ModelNotDeployedError as ex: raise errors.OpenAIException( - f"ModelNotDeployedError: model=`{data.model}` does not support `embed`. Reason: {ex}", + f"ModelNotDeployedError: model=`{data.model}` does not support `image_embed`. Reason: {ex}", + code=status.HTTP_400_BAD_REQUEST, + ) + except Exception as ex: + raise errors.OpenAIException( + f"InternalServerError: {ex}", + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + @app.post( + f"{url_prefix}/embeddings_audio", + response_model=OpenAIEmbeddingResult, + response_class=responses.ORJSONResponse, + dependencies=route_dependencies, + operation_id="embeddings_audio", + ) + async def _embeddings_audio(data: AudioEmbeddingInput): + """Encode Embeddings from Audio files + + ```python + import requests + requests.post("http://..:7997/embeddings_audio", + json={"model":"laion/larger_clap_general","input":["https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"]}) + """ + engine = _resolve_engine(data.model) + if hasattr(data.input, "host"): + # if it is a single url + audio_inputs = [str(data.input)] + else: + audio_inputs = [str(d) for d in data.input] # type: ignore + try: + logger.debug("[📝] Received request with %s Urls ", len(audio_inputs)) + start = time.perf_counter() + + embedding, usage = await engine.audio_embed(audios=audio_inputs) # type: ignore + + duration = (time.perf_counter() - start) * 1000 + logger.debug("[✅] Done in %s ms", duration) + + return OpenAIEmbeddingResult.to_embeddings_response( + embeddings=embedding, + engine_args=engine.engine_args, + encoding_format=data.encoding_format, + usage=usage, + ) + except AudioCorruption as ex: + raise errors.OpenAIException( + f"AudioCorruption, could not open {audio_inputs} -> {ex}", + code=status.HTTP_400_BAD_REQUEST, + ) + except ModelNotDeployedError as ex: + raise errors.OpenAIException( + f"ModelNotDeployedError: model=`{data.model}` does not support `audio_embed`. Reason: {ex}", code=status.HTTP_400_BAD_REQUEST, ) except Exception as ex: diff --git a/libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav b/libs/infinity_emb/tests/data/audio/beep.wav similarity index 100% rename from libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav rename to libs/infinity_emb/tests/data/audio/beep.wav diff --git a/libs/infinity_emb/tests/data/audio/ANMLCat_Smallmewingofacat(ID0098)_BSB.wav b/libs/infinity_emb/tests/data/audio/cat_meow.wav similarity index 100% rename from libs/infinity_emb/tests/data/audio/ANMLCat_Smallmewingofacat(ID0098)_BSB.wav rename to libs/infinity_emb/tests/data/audio/cat_meow.wav diff --git a/libs/infinity_emb/tests/unit_test/test_engine.py b/libs/infinity_emb/tests/unit_test/test_engine.py index 06ad425a..25b9687c 100644 --- a/libs/infinity_emb/tests/unit_test/test_engine.py +++ b/libs/infinity_emb/tests/unit_test/test_engine.py @@ -192,7 +192,7 @@ async def test_clap_like_model(): engine = AsyncEmbeddingEngine.from_args( EngineArgs(model_name_or_path=model_name, dtype="float32") ) - url = "https://github.com/michaelfeil/infinity/raw/refs/heads/main/libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav" + url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" bytes_url = requests.get(url).content inputs = ["a sound of a cat", "a sound of a cat"] @@ -211,7 +211,7 @@ async def test_clap_like_model(): @pytest.mark.anyio async def test_clip_embed_pil_image_input(): - img_url = "https://github.com/michaelfeil/infinity/raw/65afe2b3d68fda10429bf7f215fe645be20788e4/docs/assets/cats_coco_sample.jpg" + img_url = "https://github.com/michaelfeil/infinity/raw/06fd1f4d8f0a869f4482fc1c78b62a75ccbb66a1/docs/assets/cats_coco_sample.jpg" response = requests.get(img_url, stream=True) assert response.status_code == 200 img_data = response.raw diff --git a/libs/infinity_emb/tests/unit_test/transformer/audio/test_audio.py b/libs/infinity_emb/tests/unit_test/transformer/audio/test_audio.py index 7f1fda1d..65235887 100644 --- a/libs/infinity_emb/tests/unit_test/transformer/audio/test_audio.py +++ b/libs/infinity_emb/tests/unit_test/transformer/audio/test_audio.py @@ -15,7 +15,7 @@ def test_clap_like_model(): model = ClapLikeModel( engine_args=EngineArgs(model_name_or_path=model_name, dtype="float16") ) - url = "https://github.com/michaelfeil/infinity/raw/refs/heads/main/libs/infinity_emb/tests/data/audio/COMTran_Aerospacebeep1(ID2380)_BSB.wav" + url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" raw_bytes = requests.get(url, stream=True).content data, samplerate = sf.read(io.BytesIO(raw_bytes))