From 2ff39c6f1bc74101f0dec03308a674a930a64873 Mon Sep 17 00:00:00 2001
From: Michael Feil <63565275+michaelfeil@users.noreply.github.com>
Date: Fri, 4 Oct 2024 23:33:14 -0700
Subject: [PATCH] Embed openai broad multimodal compat (#395)

* inital commit

* add openapi

* fix openapi

* add poetry lock

* re iterate on Modality dtype

* commit wrt to makefile, run_test.sh scripts

* re iterate on cli
---
 docs/assets/create_cli_v2_docs.sh             |  33 ++
 .../assets/create_openapi_with_server_hook.sh |  35 ++
 docs/assets/openapi.json                      |   2 +-
 docs/docs/cli_v2.md                           | 186 ++++++++--
 libs/client_infinity/Makefile                 |   7 +-
 .../infinity_client/api/default/embeddings.py | 346 ++++++++++++++++--
 .../api/default/embeddings_audio.py           |  16 +-
 .../api/default/embeddings_image.py           |  16 +-
 .../infinity_client/models/__init__.py        |  14 +-
 .../models/audio_embedding_input.py           |   3 +-
 .../models/image_embedding_input.py           |   3 +-
 .../models/open_ai_embedding_input_audio.py   | 158 ++++++++
 ...ing_input_audio_infinity_extra_modality.py |   8 +
 .../models/open_ai_embedding_input_image.py   | 158 ++++++++
 ...ing_input_image_infinity_extra_modality.py |   8 +
 ...put.py => open_ai_embedding_input_text.py} |  33 +-
 ...ding_input_text_infinity_extra_modality.py |   8 +
 .../infinity_client/poetry.lock               | 190 ----------
 .../infinity_client/pyproject.toml            |   2 +-
 .../client_infinity/run_generate_with_hook.sh |  38 ++
 libs/client_infinity/run_tests_with_hook.sh   |  33 +-
 libs/infinity_emb/Makefile                    |  16 +-
 .../infinity_emb/fastapi_schemas/pymodels.py  |  88 ++++-
 .../infinity_emb/inference/batch_handler.py   |  20 +-
 .../infinity_emb/infinity_server.py           | 136 ++++++-
 libs/infinity_emb/infinity_emb/primitives.py  |   6 +
 .../infinity_emb/transformer/audio/utils.py   |   8 +-
 libs/infinity_emb/poetry.lock                 | 217 ++++++++---
 libs/infinity_emb/pyproject.toml              |   5 +-
 libs/infinity_emb/tests/conftest.py           |   2 +-
 .../end_to_end/test_api_with_dummymodel.py    |  33 +-
 .../end_to_end/test_openapi_client_compat.py  | 160 ++++++++
 .../end_to_end/test_optimum_embedding.py      |   4 +-
 .../end_to_end/test_sentence_transformers.py  |   2 +-
 .../tests/end_to_end/test_torch_audio.py      |  70 ++--
 .../tests/end_to_end/test_torch_reranker.py   |   2 +-
 .../tests/end_to_end/test_torch_vision.py     | 104 +++---
 37 files changed, 1672 insertions(+), 498 deletions(-)
 create mode 100755 docs/assets/create_cli_v2_docs.sh
 create mode 100755 docs/assets/create_openapi_with_server_hook.sh
 create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py
 create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py
 create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py
 create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py
 rename libs/client_infinity/infinity_client/infinity_client/models/{open_ai_embedding_input.py => open_ai_embedding_input_text.py} (70%)
 create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py
 delete mode 100644 libs/client_infinity/infinity_client/poetry.lock
 create mode 100755 libs/client_infinity/run_generate_with_hook.sh
 create mode 100644 libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py

diff --git a/docs/assets/create_cli_v2_docs.sh b/docs/assets/create_cli_v2_docs.sh
new file mode 100755
index 00000000..b59825d1
--- /dev/null
+++ b/docs/assets/create_cli_v2_docs.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+echo 'Generating CLI v2 documentation...'
+
+# Get the directory of the script
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Define the output file path relative to the script's location
+OUTPUT_FILE="$SCRIPT_DIR/../docs/cli_v2.md"
+
+# Ensure the output directory exists
+mkdir -p "$(dirname "$OUTPUT_FILE")"
+
+# Write the static content to the output file
+cat << EOF > "$OUTPUT_FILE"
+# CLI v2 Documentation
+
+The current version of Infinity uses the following arguments in its CLI:
+\`\`\`bash
+\$ infinity_emb v2 --help
+\`\`\`
+
+\`\`\`
+EOF
+
+# Append the help output to the file, setting COLUMNS=80 only for this command
+TERMINAL_WIDTH=120 poetry run infinity_emb v2 --help >> "$OUTPUT_FILE" 2>&1
+
+# Close the code block in the markdown file
+echo '```' >> "$OUTPUT_FILE"
+echo 'Note: This doc is auto-generated. Do not edit this file directly.' >> "$OUTPUT_FILE"
+
+echo "CLI v2 documentation generated and saved to $OUTPUT_FILE."
\ No newline at end of file
diff --git a/docs/assets/create_openapi_with_server_hook.sh b/docs/assets/create_openapi_with_server_hook.sh
new file mode 100755
index 00000000..b1ca5d95
--- /dev/null
+++ b/docs/assets/create_openapi_with_server_hook.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Function to handle cleanup
+cleanup() {
+  echo "Cleaning up..."
+  if [[ -n "${INFINITY_PID:-}" ]]; then
+    kill "$INFINITY_PID"
+  fi
+}
+
+# Set up the trap to run the cleanup function on EXIT or any error
+trap cleanup EXIT
+
+# Start infinity_emb in the background
+infinity_emb v2 --log-level error --engine debugengine &
+INFINITY_PID=$!
+echo "infinity_emb started with PID $INFINITY_PID"
+
+# Wait for infinity_emb to be ready
+for i in {1..10}; do
+  if wget -q --spider http://0.0.0.0:7997/openapi.json; then
+    echo "infinity_emb is ready."
+    break
+  else
+    echo "Waiting for infinity_emb to be ready..."
+    sleep 1
+  fi
+done
+
+# Download the openapi.json
+wget http://0.0.0.0:7997/openapi.json -O "$SCRIPT_DIR/openapi.json"
\ No newline at end of file
diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json
index 6a838fa5..5dcfab07 100644
--- a/docs/assets/openapi.json
+++ b/docs/assets/openapi.json
@@ -1 +1 @@
-{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.58"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n    dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings\",\n    json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n    json={\n        \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n        \"query\":\"Where is Munich?\",\n        \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n    })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n    json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":" Embeddings Image","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n    json={\n        \"model\":\"openai/clip-vit-base-patch32\",\n        \"input\": [\n            \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n            \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n        ]\n    })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_audio":{"post":{"summary":" Embeddings Audio","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n    json={\n        \"model\":\"laion/larger_clap_general\",\n        \"input\": [\n            \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n            \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n        ]\n    })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"OpenAIEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}}
\ No newline at end of file
+{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.59"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n    dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n    json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n    json={\n        \"model\": \"openai/clip-vit-base-patch32\",\n        \"encoding_format\": \"base64\",\n        \"input\": [\n            http://images.cocodataset.org/val2017/000000039769.jpg\",\n            # can also be base64 encoded\n        ],\n        # set extra modality to image to process as image\n        \"infinity_extra_modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n    '''small helper to convert url to base64 without server requiring access to the url'''\n    response = requests.get(url)\n    response.raise_for_status()\n    base64_encoded = base64.b64encode(response.content).decode('utf-8')\n    mimetype = f\"{modality}/{url.split('.')[-1]}\"\n    return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n    json={\n        \"model\": \"laion/larger_clap_general\",\n        \"encoding_format\": \"float\",\n        \"input\": [\n            url, url_to_base64(url, \"audio\")\n        ],\n        # set extra modality to audio to process as audio\n        \"infinity_extra_modality\": \"audio\"\n    }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n    model=\"laion/larger_clap_general\",\n    input=[url_to_base64(url, \"audio\")],\n    encoding_format= \"base64\",\n    extra_body={\n        \"infinity_extra_modality\": \"audio\"\n    }\n)\n\nclient.embeddings.create(\n    model=\"laion/larger_clap_general\",\n    input=[\"the sound of a beep\", \"the sound of a cat\"],\n    encoding_format= \"base64\",\n    extra_body={\n        \"infinity_extra_modality\": \"text\"\n    }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n    json={\n        \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n        \"query\":\"Where is Munich?\",\n        \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n    })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n    json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n    json={\n        \"model\":\"openai/clip-vit-base-patch32\",\n        \"input\": [\n            \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n            \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n        ]\n    })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n    json={\n        \"model\":\"laion/larger_clap_general\",\n        \"input\": [\n            \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n            \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n        ]\n    })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"infinity_extra_modality":{"type":"string","enum":["audio"],"const":"audio","title":"Infinity Extra Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P<mimetype>[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P<name>[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P<charset>[\\w\\-\\+\\.]+))?(?P<base64>\\;base64)?,(?P<data>.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"infinity_extra_modality":{"type":"string","enum":["image"],"const":"image","title":"Infinity Extra Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"infinity_extra_modality":{"type":"string","enum":["text"],"const":"text","title":"Infinity Extra Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}}
\ No newline at end of file
diff --git a/docs/docs/cli_v2.md b/docs/docs/cli_v2.md
index 9d020818..1d78f73a 100644
--- a/docs/docs/cli_v2.md
+++ b/docs/docs/cli_v2.md
@@ -1,46 +1,152 @@
 # CLI v2 Documentation
 
 The current version of Infinity uses the following arguments in its CLI:
-Note: The section below is auto-generated by the makefile.
-
 ```bash
-infinity_emb v2 --help
-                                                                                                                                                                                                                                                                                                   
- Usage: infinity_emb v2 [OPTIONS]                                                                                                                                                                                                                                                                  
-                                                                                                                                                                                                                                                                                                   
- Infinity API ♾️  cli v2. MIT License. Copyright (c) 2023-now Michael Feil                                                                                                                                                                                                                          
- Multiple Model CLI Playbook:                                                                                                                                                                                                                                                                      
- - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4`                                                                                                                                                                                    
- - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && INFINITY_BATCH_SIZE="8;4;"                                                                                                                                                      
- - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both models have batch-size 8.                                                                                                                                           
-                                                                                                                                                                                                                                                                                                   
-╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-│ --model-id                                             TEXT                                       Huggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference& [env var: `INFINITY_MODEL_ID`] [default: michaelfeil/bge-small-en-v1.5]  │
-│ --served-model-name                                    TEXT                                       the nickname for the API, under which the model_id can be selected [env var: `INFINITY_SERVED_MODEL_NAME`]                                                                                    │
-│ --batch-size                                           INTEGER                                    maximum batch size for inference [env var: `INFINITY_BATCH_SIZE`] [default: 32]                                                                                                               │
-│ --revision                                             TEXT                                       huggingface  model repo revision. [env var: `INFINITY_REVISION`]                                                                                                                              │
-│ --trust-remote-code       --no-trust-remote-code                                                  if potential remote modeling code from huggingface repo is trusted. [env var: `INFINITY_TRUST_REMOTE_CODE`] [default: trust-remote-code]                                                      │
-│ --engine                                               [torch|ctranslate2|optimum|debugengine]    Which backend to use. `torch` uses Pytorch GPU/CPU, optimum uses ONNX on GPU/CPU/NVIDIA-TensorRT, `CTranslate2` uses torch+ctranslate2 on CPU/GPU. [env var: `INFINITY_ENGINE`]               │
-│                                                                                                   [default: torch]                                                                                                                                                                              │
-│ --model-warmup            --no-model-warmup                                                       if model should be warmed up after startup, and before ready. [env var: `INFINITY_MODEL_WARMUP`] [default: model-warmup]                                                                      │
-│ --vector-disk-cache       --no-vector-disk-cache                                                  If hash(request)/results should be cached to SQLite for latency improvement. [env var: `INFINITY_VECTOR_DISK_CACHE`] [default: vector-disk-cache]                                             │
-│ --device                                               [cpu|cuda|mps|tensorrt|auto]               device to use for computing the model forward pass. [env var: `INFINITY_DEVICE`] [default: auto]                                                                                              │
-│ --lengths-via-tokenize    --no-lengths-via-tokenize                                               if True, returned tokens is based on actual tokenizer count. If false, uses len(input) as proxy. [env var: `INFINITY_LENGTHS_VIA_TOKENIZE`] [default: lengths-via-tokenize]                   │
-│ --dtype                                                [float32|float16|int8|fp8|auto]            dtype for the model weights. [env var: `INFINITY_DTYPE`] [default: auto]                                                                                                                      │
-│ --embedding-dtype                                      [float32|int8|uint8|binary|ubinary]        dtype post-forward pass. If != `float32`, using Post-Forward Static quantization. [env var: `INFINITY_EMBEDDING_DTYPE`] [default: float32]                                                    │
-│ --pooling-method                                       [mean|cls|auto]                            overwrite the pooling method if inferred incorrectly. [env var: `INFINITY_POOLING_METHOD`] [default: auto]                                                                                    │
-│ --compile                 --no-compile                                                            Enable usage of `torch.compile(dynamic=True)` if engine relies on it. [env var: `INFINITY_COMPILE`] [default: compile]                                                                        │
-│ --bettertransformer       --no-bettertransformer                                                  Enables varlen flash-attention-2 via the `BetterTransformer` implementation. If available for this model. [env var: `INFINITY_BETTERTRANSFORMER`] [default: bettertransformer]                │
-│ --preload-only            --no-preload-only                                                       If true, only downloads models and verifies setup, then exit. Recommended for pre-caching the download in a Dockerfile. [env var: `INFINITY_PRELOAD_ONLY`] [default: no-preload-only]         │
-│ --host                                                 TEXT                                       host for the FastAPI uvicorn server [env var: `INFINITY_HOST`] [default: 0.0.0.0]                                                                                                             │
-│ --port                                                 INTEGER                                    port for the FastAPI uvicorn server [env var: `INFINITY_PORT`] [default: 7997]                                                                                                                │
-│ --url-prefix                                           TEXT                                       prefix for all routes of the FastAPI uvicorn server. Useful if you run behind a proxy / cascaded API. [env var: `INFINITY_URL_PREFIX`]                                                        │
-│ --redirect-slash                                       TEXT                                       where to redirect `/` requests to. [env var: `INFINITY_REDIRECT_SLASH`] [default: /docs]                                                                                                      │
-│ --log-level                                            [critical|error|warning|info|debug|trace]  console log level. [env var: `INFINITY_LOG_LEVEL`] [default: info]                                                                                                                            │
-│ --permissive-cors         --no-permissive-cors                                                    whether to allow permissive cors. [env var: `INFINITY_PERMISSIVE_CORS`] [default: no-permissive-cors]                                                                                         │
-│ --api-key                                              TEXT                                       api_key used for authentication headers. [env var: `INFINITY_API_KEY`]                                                                                                                        │
-│ --proxy-root-path                                      TEXT                                       Proxy prefix for the application. See: https://fastapi.tiangolo.com/advanced/behind-a-proxy/ [env var: `INFINITY_PROXY_ROOT_PATH`]                                                            │
-│ --help                                                                                            Show this message and exit.                                                                                                                                                                   │
-╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+$ infinity_emb v2 --help
+```
+
+```
+                                                                                                                        
+ Usage: infinity_emb v2 [OPTIONS]                                                                                       
+                                                                                                                        
+ Infinity API ♾️  cli v2. MIT License. Copyright (c) 2023-now Michael Feil                                               
+ Multiple Model CLI Playbook:                                                                                           
+ - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4`         
+ - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" &&      
+ INFINITY_BATCH_SIZE="8;4;"                                                                                             
+ - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size  
+ 8` both models have batch-size 8.                                                                                      
+                                                                                                                        
+╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ --model-id                                             TEXT                           Huggingface model repo id.     │
+│                                                                                       Subset of possible models:     │
+│                                                                                       https://huggingface.co/models… │
+│                                                                                       [env var: `INFINITY_MODEL_ID`] │
+│                                                                                       [default:                      │
+│                                                                                       michaelfeil/bge-small-en-v1.5] │
+│ --served-model-name                                    TEXT                           the nickname for the API,      │
+│                                                                                       under which the model_id can   │
+│                                                                                       be selected                    │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_SERVED_MODEL_NAME`]  │
+│ --batch-size                                           INTEGER                        maximum batch size for         │
+│                                                                                       inference                      │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_BATCH_SIZE`]         │
+│                                                                                       [default: 32]                  │
+│ --revision                                             TEXT                           huggingface  model repo        │
+│                                                                                       revision.                      │
+│                                                                                       [env var: `INFINITY_REVISION`] │
+│ --trust-remote-code       --no-trust-remote-code                                      if potential remote modeling   │
+│                                                                                       code from huggingface repo is  │
+│                                                                                       trusted.                       │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_TRUST_REMOTE_CODE`]  │
+│                                                                                       [default: trust-remote-code]   │
+│ --engine                                               [torch|ctranslate2|optimum|de  Which backend to use. `torch`  │
+│                                                        bugengine]                     uses Pytorch GPU/CPU, optimum  │
+│                                                                                       uses ONNX on                   │
+│                                                                                       GPU/CPU/NVIDIA-TensorRT,       │
+│                                                                                       `CTranslate2` uses             │
+│                                                                                       torch+ctranslate2 on CPU/GPU.  │
+│                                                                                       [env var: `INFINITY_ENGINE`]   │
+│                                                                                       [default: torch]               │
+│ --model-warmup            --no-model-warmup                                           if model should be warmed up   │
+│                                                                                       after startup, and before      │
+│                                                                                       ready.                         │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_MODEL_WARMUP`]       │
+│                                                                                       [default: model-warmup]        │
+│ --vector-disk-cache       --no-vector-disk-cache                                      If hash(request)/results       │
+│                                                                                       should be cached to SQLite for │
+│                                                                                       latency improvement.           │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_VECTOR_DISK_CACHE`]  │
+│                                                                                       [default: vector-disk-cache]   │
+│ --device                                               [cpu|cuda|mps|tensorrt|auto]   device to use for computing    │
+│                                                                                       the model forward pass.        │
+│                                                                                       [env var: `INFINITY_DEVICE`]   │
+│                                                                                       [default: auto]                │
+│ --lengths-via-tokenize    --no-lengths-via-tokenize                                   if True, returned tokens is    │
+│                                                                                       based on actual tokenizer      │
+│                                                                                       count. If false, uses          │
+│                                                                                       len(input) as proxy.           │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_LENGTHS_VIA_TOKENIZ… │
+│                                                                                       [default:                      │
+│                                                                                       lengths-via-tokenize]          │
+│ --dtype                                                [float32|float16|int8|fp8|aut  dtype for the model weights.   │
+│                                                        o]                             [env var: `INFINITY_DTYPE`]    │
+│                                                                                       [default: auto]                │
+│ --embedding-dtype                                      [float32|int8|uint8|binary|ub  dtype post-forward pass. If != │
+│                                                        inary]                         `float32`, using Post-Forward  │
+│                                                                                       Static quantization.           │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_EMBEDDING_DTYPE`]    │
+│                                                                                       [default: float32]             │
+│ --pooling-method                                       [mean|cls|auto]                overwrite the pooling method   │
+│                                                                                       if inferred incorrectly.       │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_POOLING_METHOD`]     │
+│                                                                                       [default: auto]                │
+│ --compile                 --no-compile                                                Enable usage of                │
+│                                                                                       `torch.compile(dynamic=True)`  │
+│                                                                                       if engine relies on it.        │
+│                                                                                       [env var: `INFINITY_COMPILE`]  │
+│                                                                                       [default: compile]             │
+│ --bettertransformer       --no-bettertransformer                                      Enables varlen                 │
+│                                                                                       flash-attention-2 via the      │
+│                                                                                       `BetterTransformer`            │
+│                                                                                       implementation. If available   │
+│                                                                                       for this model.                │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_BETTERTRANSFORMER`]  │
+│                                                                                       [default: bettertransformer]   │
+│ --preload-only            --no-preload-only                                           If true, only downloads models │
+│                                                                                       and verifies setup, then exit. │
+│                                                                                       Recommended for pre-caching    │
+│                                                                                       the download in a Dockerfile.  │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_PRELOAD_ONLY`]       │
+│                                                                                       [default: no-preload-only]     │
+│ --host                                                 TEXT                           host for the FastAPI uvicorn   │
+│                                                                                       server                         │
+│                                                                                       [env var: `INFINITY_HOST`]     │
+│                                                                                       [default: 0.0.0.0]             │
+│ --port                                                 INTEGER                        port for the FastAPI uvicorn   │
+│                                                                                       server                         │
+│                                                                                       [env var: `INFINITY_PORT`]     │
+│                                                                                       [default: 7997]                │
+│ --url-prefix                                           TEXT                           prefix for all routes of the   │
+│                                                                                       FastAPI uvicorn server. Useful │
+│                                                                                       if you run behind a proxy /    │
+│                                                                                       cascaded API.                  │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_URL_PREFIX`]         │
+│ --redirect-slash                                       TEXT                           where to redirect `/` requests │
+│                                                                                       to.                            │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_REDIRECT_SLASH`]     │
+│                                                                                       [default: /docs]               │
+│ --log-level                                            [critical|error|warning|info|  console log level.             │
+│                                                        debug|trace]                   [env var:                      │
+│                                                                                       `INFINITY_LOG_LEVEL`]          │
+│                                                                                       [default: info]                │
+│ --permissive-cors         --no-permissive-cors                                        whether to allow permissive    │
+│                                                                                       cors.                          │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_PERMISSIVE_CORS`]    │
+│                                                                                       [default: no-permissive-cors]  │
+│ --api-key                                              TEXT                           api_key used for               │
+│                                                                                       authentication headers.        │
+│                                                                                       [env var: `INFINITY_API_KEY`]  │
+│ --proxy-root-path                                      TEXT                           Proxy prefix for the           │
+│                                                                                       application. See:              │
+│                                                                                       https://fastapi.tiangolo.com/… │
+│                                                                                       [env var:                      │
+│                                                                                       `INFINITY_PROXY_ROOT_PATH`]    │
+│ --help                                                                                Show this message and exit.    │
+╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
+Note: This doc is auto-generated. Do not edit this file directly.
diff --git a/libs/client_infinity/Makefile b/libs/client_infinity/Makefile
index 9f29710e..0e0b0b51 100644
--- a/libs/client_infinity/Makefile
+++ b/libs/client_infinity/Makefile
@@ -1,12 +1,7 @@
 .PHONY: generate tests
 
 generate:
-	pip install openapi-python-client==0.21.1
-	 openapi-python-client generate  \
-	  --url http://0.0.0.0:7997/openapi.json \
-	  --config client_config.yaml \
-	   --overwrite \
-	   --custom-template-path=./template
+	./run_generate_with_hook.sh
 
 tests:
 	./run_tests_with_hook.sh
diff --git a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py
index d1e9e995..e2dfd6de 100644
--- a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py
+++ b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py
@@ -6,14 +6,16 @@
 from ... import errors
 from ...client import AuthenticatedClient, Client
 from ...models.http_validation_error import HTTPValidationError
-from ...models.open_ai_embedding_input import OpenAIEmbeddingInput
+from ...models.open_ai_embedding_input_audio import OpenAIEmbeddingInputAudio
+from ...models.open_ai_embedding_input_image import OpenAIEmbeddingInputImage
+from ...models.open_ai_embedding_input_text import OpenAIEmbeddingInputText
 from ...models.open_ai_embedding_result import OpenAIEmbeddingResult
 from ...types import Response
 
 
 def _get_kwargs(
     *,
-    body: OpenAIEmbeddingInput,
+    body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"],
 ) -> Dict[str, Any]:
     headers: Dict[str, Any] = {}
 
@@ -22,7 +24,13 @@ def _get_kwargs(
         "url": "/embeddings",
     }
 
-    _body = body.to_dict()
+    _body: Dict[str, Any]
+    if isinstance(body, OpenAIEmbeddingInputText):
+        _body = body.to_dict()
+    elif isinstance(body, OpenAIEmbeddingInputAudio):
+        _body = body.to_dict()
+    else:
+        _body = body.to_dict()
 
     _kwargs["json"] = _body
     headers["Content-Type"] = "application/json"
@@ -62,20 +70,93 @@ def _build_response(
 def sync_detailed(
     *,
     client: Union[AuthenticatedClient, Client],
-    body: OpenAIEmbeddingInput,
+    body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"],
 ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
     r"""Embeddings
 
-     Encode Embeddings
+     Encode Embeddings. Supports with multimodal inputs.
+
+    ## Running Text Embeddings
+    ```python
+    import requests, base64
+    requests.post(\"http://..:7997/embeddings\",
+        json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})
+    ```
 
+    ## Running Image Embeddings
     ```python
-    import requests
     requests.post(\"http://..:7997/embeddings\",
-        json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})
+        json={
+            \"model\": \"openai/clip-vit-base-patch32\",
+            \"encoding_format\": \"base64\",
+            \"input\": [
+                http://images.cocodataset.org/val2017/000000039769.jpg\",
+                # can also be base64 encoded
+            ],
+            # set extra modality to image to process as image
+            \"infinity_extra_modality\": \"image\"
+    )
+    ```
+
+    ## Running Audio Embeddings
+    ```python
+    import requests, base64
+    url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in
+    finity_emb/tests/data/audio/beep.wav\"
+
+    def url_to_base64(url, modality = \"image\"):
+        '''small helper to convert url to base64 without server requiring access to the url'''
+        response = requests.get(url)
+        response.raise_for_status()
+        base64_encoded = base64.b64encode(response.content).decode('utf-8')
+        mimetype = f\"{modality}/{url.split('.')[-1]}\"
+        return f\"data:{mimetype};base64,{base64_encoded}\"
+
+    requests.post(\"http://localhost:7997/embeddings\",
+        json={
+            \"model\": \"laion/larger_clap_general\",
+            \"encoding_format\": \"float\",
+            \"input\": [
+                url, url_to_base64(url, \"audio\")
+            ],
+            # set extra modality to audio to process as audio
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+    ```
+
+    ## Running via OpenAI Client
+    ```python
+    from openai import OpenAI # pip install openai==1.51.0
+    client = OpenAI(base_url=\"http://localhost:7997/\")
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[url_to_base64(url, \"audio\")],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[\"the sound of a beep\", \"the sound of a cat\"],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"text\"
+        }
+    )
+    ```
+
+    ### Hint: Run all the above models on one server:
+    ```bash
+    infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id
+    laion/larger_clap_general
     ```
 
     Args:
-        body (OpenAIEmbeddingInput):
+        body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage',
+            'OpenAIEmbeddingInputText']):
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -99,20 +180,93 @@ def sync_detailed(
 def sync(
     *,
     client: Union[AuthenticatedClient, Client],
-    body: OpenAIEmbeddingInput,
+    body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"],
 ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
     r"""Embeddings
 
-     Encode Embeddings
+     Encode Embeddings. Supports with multimodal inputs.
 
+    ## Running Text Embeddings
     ```python
-    import requests
+    import requests, base64
     requests.post(\"http://..:7997/embeddings\",
-        json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})
+        json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})
+    ```
+
+    ## Running Image Embeddings
+    ```python
+    requests.post(\"http://..:7997/embeddings\",
+        json={
+            \"model\": \"openai/clip-vit-base-patch32\",
+            \"encoding_format\": \"base64\",
+            \"input\": [
+                http://images.cocodataset.org/val2017/000000039769.jpg\",
+                # can also be base64 encoded
+            ],
+            # set extra modality to image to process as image
+            \"infinity_extra_modality\": \"image\"
+    )
+    ```
+
+    ## Running Audio Embeddings
+    ```python
+    import requests, base64
+    url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in
+    finity_emb/tests/data/audio/beep.wav\"
+
+    def url_to_base64(url, modality = \"image\"):
+        '''small helper to convert url to base64 without server requiring access to the url'''
+        response = requests.get(url)
+        response.raise_for_status()
+        base64_encoded = base64.b64encode(response.content).decode('utf-8')
+        mimetype = f\"{modality}/{url.split('.')[-1]}\"
+        return f\"data:{mimetype};base64,{base64_encoded}\"
+
+    requests.post(\"http://localhost:7997/embeddings\",
+        json={
+            \"model\": \"laion/larger_clap_general\",
+            \"encoding_format\": \"float\",
+            \"input\": [
+                url, url_to_base64(url, \"audio\")
+            ],
+            # set extra modality to audio to process as audio
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+    ```
+
+    ## Running via OpenAI Client
+    ```python
+    from openai import OpenAI # pip install openai==1.51.0
+    client = OpenAI(base_url=\"http://localhost:7997/\")
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[url_to_base64(url, \"audio\")],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[\"the sound of a beep\", \"the sound of a cat\"],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"text\"
+        }
+    )
+    ```
+
+    ### Hint: Run all the above models on one server:
+    ```bash
+    infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id
+    laion/larger_clap_general
     ```
 
     Args:
-        body (OpenAIEmbeddingInput):
+        body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage',
+            'OpenAIEmbeddingInputText']):
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -131,20 +285,93 @@ def sync(
 async def asyncio_detailed(
     *,
     client: Union[AuthenticatedClient, Client],
-    body: OpenAIEmbeddingInput,
+    body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"],
 ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
     r"""Embeddings
 
-     Encode Embeddings
+     Encode Embeddings. Supports with multimodal inputs.
 
+    ## Running Text Embeddings
     ```python
-    import requests
+    import requests, base64
     requests.post(\"http://..:7997/embeddings\",
-        json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})
+        json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})
+    ```
+
+    ## Running Image Embeddings
+    ```python
+    requests.post(\"http://..:7997/embeddings\",
+        json={
+            \"model\": \"openai/clip-vit-base-patch32\",
+            \"encoding_format\": \"base64\",
+            \"input\": [
+                http://images.cocodataset.org/val2017/000000039769.jpg\",
+                # can also be base64 encoded
+            ],
+            # set extra modality to image to process as image
+            \"infinity_extra_modality\": \"image\"
+    )
+    ```
+
+    ## Running Audio Embeddings
+    ```python
+    import requests, base64
+    url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in
+    finity_emb/tests/data/audio/beep.wav\"
+
+    def url_to_base64(url, modality = \"image\"):
+        '''small helper to convert url to base64 without server requiring access to the url'''
+        response = requests.get(url)
+        response.raise_for_status()
+        base64_encoded = base64.b64encode(response.content).decode('utf-8')
+        mimetype = f\"{modality}/{url.split('.')[-1]}\"
+        return f\"data:{mimetype};base64,{base64_encoded}\"
+
+    requests.post(\"http://localhost:7997/embeddings\",
+        json={
+            \"model\": \"laion/larger_clap_general\",
+            \"encoding_format\": \"float\",
+            \"input\": [
+                url, url_to_base64(url, \"audio\")
+            ],
+            # set extra modality to audio to process as audio
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+    ```
+
+    ## Running via OpenAI Client
+    ```python
+    from openai import OpenAI # pip install openai==1.51.0
+    client = OpenAI(base_url=\"http://localhost:7997/\")
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[url_to_base64(url, \"audio\")],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[\"the sound of a beep\", \"the sound of a cat\"],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"text\"
+        }
+    )
+    ```
+
+    ### Hint: Run all the above models on one server:
+    ```bash
+    infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id
+    laion/larger_clap_general
     ```
 
     Args:
-        body (OpenAIEmbeddingInput):
+        body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage',
+            'OpenAIEmbeddingInputText']):
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -166,20 +393,93 @@ async def asyncio_detailed(
 async def asyncio(
     *,
     client: Union[AuthenticatedClient, Client],
-    body: OpenAIEmbeddingInput,
+    body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"],
 ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
     r"""Embeddings
 
-     Encode Embeddings
+     Encode Embeddings. Supports with multimodal inputs.
 
+    ## Running Text Embeddings
     ```python
-    import requests
+    import requests, base64
     requests.post(\"http://..:7997/embeddings\",
-        json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})
+        json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})
+    ```
+
+    ## Running Image Embeddings
+    ```python
+    requests.post(\"http://..:7997/embeddings\",
+        json={
+            \"model\": \"openai/clip-vit-base-patch32\",
+            \"encoding_format\": \"base64\",
+            \"input\": [
+                http://images.cocodataset.org/val2017/000000039769.jpg\",
+                # can also be base64 encoded
+            ],
+            # set extra modality to image to process as image
+            \"infinity_extra_modality\": \"image\"
+    )
+    ```
+
+    ## Running Audio Embeddings
+    ```python
+    import requests, base64
+    url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in
+    finity_emb/tests/data/audio/beep.wav\"
+
+    def url_to_base64(url, modality = \"image\"):
+        '''small helper to convert url to base64 without server requiring access to the url'''
+        response = requests.get(url)
+        response.raise_for_status()
+        base64_encoded = base64.b64encode(response.content).decode('utf-8')
+        mimetype = f\"{modality}/{url.split('.')[-1]}\"
+        return f\"data:{mimetype};base64,{base64_encoded}\"
+
+    requests.post(\"http://localhost:7997/embeddings\",
+        json={
+            \"model\": \"laion/larger_clap_general\",
+            \"encoding_format\": \"float\",
+            \"input\": [
+                url, url_to_base64(url, \"audio\")
+            ],
+            # set extra modality to audio to process as audio
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+    ```
+
+    ## Running via OpenAI Client
+    ```python
+    from openai import OpenAI # pip install openai==1.51.0
+    client = OpenAI(base_url=\"http://localhost:7997/\")
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[url_to_base64(url, \"audio\")],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"audio\"
+        }
+    )
+
+    client.embeddings.create(
+        model=\"laion/larger_clap_general\",
+        input=[\"the sound of a beep\", \"the sound of a cat\"],
+        encoding_format= \"base64\",
+        extra_body={
+            \"infinity_extra_modality\": \"text\"
+        }
+    )
+    ```
+
+    ### Hint: Run all the above models on one server:
+    ```bash
+    infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id
+    laion/larger_clap_general
     ```
 
     Args:
-        body (OpenAIEmbeddingInput):
+        body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage',
+            'OpenAIEmbeddingInputText']):
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
diff --git a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py
index 95e8ed0e..ff2ece66 100644
--- a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py
+++ b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py
@@ -64,7 +64,7 @@ def sync_detailed(
     client: Union[AuthenticatedClient, Client],
     body: AudioEmbeddingInput,
 ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Audio
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`
 
      Encode Embeddings from Audio files
 
@@ -84,7 +84,7 @@ def sync_detailed(
     ```
 
     Args:
-        body (AudioEmbeddingInput):
+        body (AudioEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -110,7 +110,7 @@ def sync(
     client: Union[AuthenticatedClient, Client],
     body: AudioEmbeddingInput,
 ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Audio
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`
 
      Encode Embeddings from Audio files
 
@@ -130,7 +130,7 @@ def sync(
     ```
 
     Args:
-        body (AudioEmbeddingInput):
+        body (AudioEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -151,7 +151,7 @@ async def asyncio_detailed(
     client: Union[AuthenticatedClient, Client],
     body: AudioEmbeddingInput,
 ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Audio
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`
 
      Encode Embeddings from Audio files
 
@@ -171,7 +171,7 @@ async def asyncio_detailed(
     ```
 
     Args:
-        body (AudioEmbeddingInput):
+        body (AudioEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -195,7 +195,7 @@ async def asyncio(
     client: Union[AuthenticatedClient, Client],
     body: AudioEmbeddingInput,
 ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Audio
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`
 
      Encode Embeddings from Audio files
 
@@ -215,7 +215,7 @@ async def asyncio(
     ```
 
     Args:
-        body (AudioEmbeddingInput):
+        body (AudioEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
diff --git a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py
index 71f36ad7..9bceddb9 100644
--- a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py
+++ b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py
@@ -64,7 +64,7 @@ def sync_detailed(
     client: Union[AuthenticatedClient, Client],
     body: ImageEmbeddingInput,
 ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Image
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`
 
      Encode Embeddings from Image files
 
@@ -83,7 +83,7 @@ def sync_detailed(
     ```
 
     Args:
-        body (ImageEmbeddingInput):
+        body (ImageEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -109,7 +109,7 @@ def sync(
     client: Union[AuthenticatedClient, Client],
     body: ImageEmbeddingInput,
 ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Image
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`
 
      Encode Embeddings from Image files
 
@@ -128,7 +128,7 @@ def sync(
     ```
 
     Args:
-        body (ImageEmbeddingInput):
+        body (ImageEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -149,7 +149,7 @@ async def asyncio_detailed(
     client: Union[AuthenticatedClient, Client],
     body: ImageEmbeddingInput,
 ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Image
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`
 
      Encode Embeddings from Image files
 
@@ -168,7 +168,7 @@ async def asyncio_detailed(
     ```
 
     Args:
-        body (ImageEmbeddingInput):
+        body (ImageEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
@@ -192,7 +192,7 @@ async def asyncio(
     client: Union[AuthenticatedClient, Client],
     body: ImageEmbeddingInput,
 ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]:
-    r"""Embeddings Image
+    r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`
 
      Encode Embeddings from Image files
 
@@ -211,7 +211,7 @@ async def asyncio(
     ```
 
     Args:
-        body (ImageEmbeddingInput):
+        body (ImageEmbeddingInput): # LEGACY
 
     Raises:
         errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/__init__.py b/libs/client_infinity/infinity_client/infinity_client/models/__init__.py
index 16ea6139..8a58b644 100644
--- a/libs/client_infinity/infinity_client/infinity_client/models/__init__.py
+++ b/libs/client_infinity/infinity_client/infinity_client/models/__init__.py
@@ -13,7 +13,12 @@
 from .model_info import ModelInfo
 from .model_info_object import ModelInfoObject
 from .model_info_owned_by import ModelInfoOwnedBy
-from .open_ai_embedding_input import OpenAIEmbeddingInput
+from .open_ai_embedding_input_audio import OpenAIEmbeddingInputAudio
+from .open_ai_embedding_input_audio_infinity_extra_modality import OpenAIEmbeddingInputAudioInfinityExtraModality
+from .open_ai_embedding_input_image import OpenAIEmbeddingInputImage
+from .open_ai_embedding_input_image_infinity_extra_modality import OpenAIEmbeddingInputImageInfinityExtraModality
+from .open_ai_embedding_input_text import OpenAIEmbeddingInputText
+from .open_ai_embedding_input_text_infinity_extra_modality import OpenAIEmbeddingInputTextInfinityExtraModality
 from .open_ai_embedding_result import OpenAIEmbeddingResult
 from .open_ai_embedding_result_object import OpenAIEmbeddingResultObject
 from .open_ai_model_info import OpenAIModelInfo
@@ -40,7 +45,12 @@
     "ModelInfo",
     "ModelInfoObject",
     "ModelInfoOwnedBy",
-    "OpenAIEmbeddingInput",
+    "OpenAIEmbeddingInputAudio",
+    "OpenAIEmbeddingInputAudioInfinityExtraModality",
+    "OpenAIEmbeddingInputImage",
+    "OpenAIEmbeddingInputImageInfinityExtraModality",
+    "OpenAIEmbeddingInputText",
+    "OpenAIEmbeddingInputTextInfinityExtraModality",
     "OpenAIEmbeddingResult",
     "OpenAIEmbeddingResultObject",
     "OpenAIModelInfo",
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py b/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py
index d4f6d247..11986996 100644
--- a/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py
+++ b/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py
@@ -11,7 +11,8 @@
 
 @_attrs_define
 class AudioEmbeddingInput:
-    """
+    """# LEGACY
+
     Attributes:
         input_ (Union[List[str], str]):
         model (Union[Unset, str]):  Default: 'default/not-specified'.
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py b/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py
index a75a5801..c91d1ff4 100644
--- a/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py
+++ b/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py
@@ -11,7 +11,8 @@
 
 @_attrs_define
 class ImageEmbeddingInput:
-    """
+    """# LEGACY
+
     Attributes:
         input_ (Union[List[str], str]):
         model (Union[Unset, str]):  Default: 'default/not-specified'.
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py
new file mode 100644
index 00000000..0653500d
--- /dev/null
+++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py
@@ -0,0 +1,158 @@
+from typing import Any, Dict, List, Type, TypeVar, Union, cast
+
+from attrs import define as _attrs_define
+from attrs import field as _attrs_field
+
+from ..models.embedding_encoding_format import EmbeddingEncodingFormat
+from ..models.open_ai_embedding_input_audio_infinity_extra_modality import (
+    OpenAIEmbeddingInputAudioInfinityExtraModality,
+)
+from ..types import UNSET, Unset
+
+T = TypeVar("T", bound="OpenAIEmbeddingInputAudio")
+
+
+@_attrs_define
+class OpenAIEmbeddingInputAudio:
+    """
+    Attributes:
+        input_ (Union[List[str], str]):
+        model (Union[Unset, str]):  Default: 'default/not-specified'.
+        encoding_format (Union[Unset, EmbeddingEncodingFormat]):
+        user (Union[None, Unset, str]):
+        infinity_extra_modality (Union[Unset, OpenAIEmbeddingInputAudioInfinityExtraModality]):  Default:
+            OpenAIEmbeddingInputAudioInfinityExtraModality.AUDIO.
+    """
+
+    input_: Union[List[str], str]
+    model: Union[Unset, str] = "default/not-specified"
+    encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET
+    user: Union[None, Unset, str] = UNSET
+    infinity_extra_modality: Union[
+        Unset, OpenAIEmbeddingInputAudioInfinityExtraModality
+    ] = OpenAIEmbeddingInputAudioInfinityExtraModality.AUDIO
+    additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        input_: Union[List[str], str]
+        if isinstance(self.input_, list):
+            input_ = []
+            for input_type_0_item_data in self.input_:
+                input_type_0_item: str
+                input_type_0_item = input_type_0_item_data
+                input_.append(input_type_0_item)
+
+        else:
+            input_ = self.input_
+
+        model = self.model
+
+        encoding_format: Union[Unset, str] = UNSET
+        if not isinstance(self.encoding_format, Unset):
+            encoding_format = self.encoding_format.value
+
+        user: Union[None, Unset, str]
+        if isinstance(self.user, Unset):
+            user = UNSET
+        else:
+            user = self.user
+
+        infinity_extra_modality: Union[Unset, str] = UNSET
+        if not isinstance(self.infinity_extra_modality, Unset):
+            infinity_extra_modality = self.infinity_extra_modality.value
+
+        field_dict: Dict[str, Any] = {}
+        field_dict.update(self.additional_properties)
+        field_dict.update(
+            {
+                "input": input_,
+            }
+        )
+        if model is not UNSET:
+            field_dict["model"] = model
+        if encoding_format is not UNSET:
+            field_dict["encoding_format"] = encoding_format
+        if user is not UNSET:
+            field_dict["user"] = user
+        if infinity_extra_modality is not UNSET:
+            field_dict["infinity_extra_modality"] = infinity_extra_modality
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+        d = src_dict.copy()
+
+        def _parse_input_(data: object) -> Union[List[str], str]:
+            try:
+                if not isinstance(data, list):
+                    raise TypeError()
+                input_type_0 = []
+                _input_type_0 = data
+                for input_type_0_item_data in _input_type_0:
+
+                    def _parse_input_type_0_item(data: object) -> str:
+                        return cast(str, data)
+
+                    input_type_0_item = _parse_input_type_0_item(input_type_0_item_data)
+
+                    input_type_0.append(input_type_0_item)
+
+                return input_type_0
+            except:  # noqa: E722
+                pass
+            return cast(Union[List[str], str], data)
+
+        input_ = _parse_input_(d.pop("input"))
+
+        model = d.pop("model", UNSET)
+
+        _encoding_format = d.pop("encoding_format", UNSET)
+        encoding_format: Union[Unset, EmbeddingEncodingFormat]
+        if isinstance(_encoding_format, Unset):
+            encoding_format = UNSET
+        else:
+            encoding_format = EmbeddingEncodingFormat(_encoding_format)
+
+        def _parse_user(data: object) -> Union[None, Unset, str]:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(Union[None, Unset, str], data)
+
+        user = _parse_user(d.pop("user", UNSET))
+
+        _infinity_extra_modality = d.pop("infinity_extra_modality", UNSET)
+        infinity_extra_modality: Union[Unset, OpenAIEmbeddingInputAudioInfinityExtraModality]
+        if isinstance(_infinity_extra_modality, Unset):
+            infinity_extra_modality = UNSET
+        else:
+            infinity_extra_modality = OpenAIEmbeddingInputAudioInfinityExtraModality(_infinity_extra_modality)
+
+        open_ai_embedding_input_audio = cls(
+            input_=input_,
+            model=model,
+            encoding_format=encoding_format,
+            user=user,
+            infinity_extra_modality=infinity_extra_modality,
+        )
+
+        open_ai_embedding_input_audio.additional_properties = d
+        return open_ai_embedding_input_audio
+
+    @property
+    def additional_keys(self) -> List[str]:
+        return list(self.additional_properties.keys())
+
+    def __getitem__(self, key: str) -> Any:
+        return self.additional_properties[key]
+
+    def __setitem__(self, key: str, value: Any) -> None:
+        self.additional_properties[key] = value
+
+    def __delitem__(self, key: str) -> None:
+        del self.additional_properties[key]
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.additional_properties
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py
new file mode 100644
index 00000000..257de996
--- /dev/null
+++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class OpenAIEmbeddingInputAudioInfinityExtraModality(str, Enum):
+    AUDIO = "audio"
+
+    def __str__(self) -> str:
+        return str(self.value)
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py
new file mode 100644
index 00000000..d2b70311
--- /dev/null
+++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py
@@ -0,0 +1,158 @@
+from typing import Any, Dict, List, Type, TypeVar, Union, cast
+
+from attrs import define as _attrs_define
+from attrs import field as _attrs_field
+
+from ..models.embedding_encoding_format import EmbeddingEncodingFormat
+from ..models.open_ai_embedding_input_image_infinity_extra_modality import (
+    OpenAIEmbeddingInputImageInfinityExtraModality,
+)
+from ..types import UNSET, Unset
+
+T = TypeVar("T", bound="OpenAIEmbeddingInputImage")
+
+
+@_attrs_define
+class OpenAIEmbeddingInputImage:
+    """
+    Attributes:
+        input_ (Union[List[str], str]):
+        model (Union[Unset, str]):  Default: 'default/not-specified'.
+        encoding_format (Union[Unset, EmbeddingEncodingFormat]):
+        user (Union[None, Unset, str]):
+        infinity_extra_modality (Union[Unset, OpenAIEmbeddingInputImageInfinityExtraModality]):  Default:
+            OpenAIEmbeddingInputImageInfinityExtraModality.IMAGE.
+    """
+
+    input_: Union[List[str], str]
+    model: Union[Unset, str] = "default/not-specified"
+    encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET
+    user: Union[None, Unset, str] = UNSET
+    infinity_extra_modality: Union[
+        Unset, OpenAIEmbeddingInputImageInfinityExtraModality
+    ] = OpenAIEmbeddingInputImageInfinityExtraModality.IMAGE
+    additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        input_: Union[List[str], str]
+        if isinstance(self.input_, list):
+            input_ = []
+            for input_type_0_item_data in self.input_:
+                input_type_0_item: str
+                input_type_0_item = input_type_0_item_data
+                input_.append(input_type_0_item)
+
+        else:
+            input_ = self.input_
+
+        model = self.model
+
+        encoding_format: Union[Unset, str] = UNSET
+        if not isinstance(self.encoding_format, Unset):
+            encoding_format = self.encoding_format.value
+
+        user: Union[None, Unset, str]
+        if isinstance(self.user, Unset):
+            user = UNSET
+        else:
+            user = self.user
+
+        infinity_extra_modality: Union[Unset, str] = UNSET
+        if not isinstance(self.infinity_extra_modality, Unset):
+            infinity_extra_modality = self.infinity_extra_modality.value
+
+        field_dict: Dict[str, Any] = {}
+        field_dict.update(self.additional_properties)
+        field_dict.update(
+            {
+                "input": input_,
+            }
+        )
+        if model is not UNSET:
+            field_dict["model"] = model
+        if encoding_format is not UNSET:
+            field_dict["encoding_format"] = encoding_format
+        if user is not UNSET:
+            field_dict["user"] = user
+        if infinity_extra_modality is not UNSET:
+            field_dict["infinity_extra_modality"] = infinity_extra_modality
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
+        d = src_dict.copy()
+
+        def _parse_input_(data: object) -> Union[List[str], str]:
+            try:
+                if not isinstance(data, list):
+                    raise TypeError()
+                input_type_0 = []
+                _input_type_0 = data
+                for input_type_0_item_data in _input_type_0:
+
+                    def _parse_input_type_0_item(data: object) -> str:
+                        return cast(str, data)
+
+                    input_type_0_item = _parse_input_type_0_item(input_type_0_item_data)
+
+                    input_type_0.append(input_type_0_item)
+
+                return input_type_0
+            except:  # noqa: E722
+                pass
+            return cast(Union[List[str], str], data)
+
+        input_ = _parse_input_(d.pop("input"))
+
+        model = d.pop("model", UNSET)
+
+        _encoding_format = d.pop("encoding_format", UNSET)
+        encoding_format: Union[Unset, EmbeddingEncodingFormat]
+        if isinstance(_encoding_format, Unset):
+            encoding_format = UNSET
+        else:
+            encoding_format = EmbeddingEncodingFormat(_encoding_format)
+
+        def _parse_user(data: object) -> Union[None, Unset, str]:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(Union[None, Unset, str], data)
+
+        user = _parse_user(d.pop("user", UNSET))
+
+        _infinity_extra_modality = d.pop("infinity_extra_modality", UNSET)
+        infinity_extra_modality: Union[Unset, OpenAIEmbeddingInputImageInfinityExtraModality]
+        if isinstance(_infinity_extra_modality, Unset):
+            infinity_extra_modality = UNSET
+        else:
+            infinity_extra_modality = OpenAIEmbeddingInputImageInfinityExtraModality(_infinity_extra_modality)
+
+        open_ai_embedding_input_image = cls(
+            input_=input_,
+            model=model,
+            encoding_format=encoding_format,
+            user=user,
+            infinity_extra_modality=infinity_extra_modality,
+        )
+
+        open_ai_embedding_input_image.additional_properties = d
+        return open_ai_embedding_input_image
+
+    @property
+    def additional_keys(self) -> List[str]:
+        return list(self.additional_properties.keys())
+
+    def __getitem__(self, key: str) -> Any:
+        return self.additional_properties[key]
+
+    def __setitem__(self, key: str, value: Any) -> None:
+        self.additional_properties[key] = value
+
+    def __delitem__(self, key: str) -> None:
+        del self.additional_properties[key]
+
+    def __contains__(self, key: str) -> bool:
+        return key in self.additional_properties
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py
new file mode 100644
index 00000000..20a57588
--- /dev/null
+++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class OpenAIEmbeddingInputImageInfinityExtraModality(str, Enum):
+    IMAGE = "image"
+
+    def __str__(self) -> str:
+        return str(self.value)
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py
similarity index 70%
rename from libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input.py
rename to libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py
index 756d91b8..2396a2f8 100644
--- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input.py
+++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py
@@ -4,25 +4,32 @@
 from attrs import field as _attrs_field
 
 from ..models.embedding_encoding_format import EmbeddingEncodingFormat
+from ..models.open_ai_embedding_input_text_infinity_extra_modality import OpenAIEmbeddingInputTextInfinityExtraModality
 from ..types import UNSET, Unset
 
-T = TypeVar("T", bound="OpenAIEmbeddingInput")
+T = TypeVar("T", bound="OpenAIEmbeddingInputText")
 
 
 @_attrs_define
-class OpenAIEmbeddingInput:
-    """
+class OpenAIEmbeddingInputText:
+    """helper
+
     Attributes:
         input_ (Union[List[str], str]):
         model (Union[Unset, str]):  Default: 'default/not-specified'.
         encoding_format (Union[Unset, EmbeddingEncodingFormat]):
         user (Union[None, Unset, str]):
+        infinity_extra_modality (Union[Unset, OpenAIEmbeddingInputTextInfinityExtraModality]):  Default:
+            OpenAIEmbeddingInputTextInfinityExtraModality.TEXT.
     """
 
     input_: Union[List[str], str]
     model: Union[Unset, str] = "default/not-specified"
     encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET
     user: Union[None, Unset, str] = UNSET
+    infinity_extra_modality: Union[
+        Unset, OpenAIEmbeddingInputTextInfinityExtraModality
+    ] = OpenAIEmbeddingInputTextInfinityExtraModality.TEXT
     additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)
 
     def to_dict(self) -> Dict[str, Any]:
@@ -45,6 +52,10 @@ def to_dict(self) -> Dict[str, Any]:
         else:
             user = self.user
 
+        infinity_extra_modality: Union[Unset, str] = UNSET
+        if not isinstance(self.infinity_extra_modality, Unset):
+            infinity_extra_modality = self.infinity_extra_modality.value
+
         field_dict: Dict[str, Any] = {}
         field_dict.update(self.additional_properties)
         field_dict.update(
@@ -58,6 +69,8 @@ def to_dict(self) -> Dict[str, Any]:
             field_dict["encoding_format"] = encoding_format
         if user is not UNSET:
             field_dict["user"] = user
+        if infinity_extra_modality is not UNSET:
+            field_dict["infinity_extra_modality"] = infinity_extra_modality
 
         return field_dict
 
@@ -96,15 +109,23 @@ def _parse_user(data: object) -> Union[None, Unset, str]:
 
         user = _parse_user(d.pop("user", UNSET))
 
-        open_ai_embedding_input = cls(
+        _infinity_extra_modality = d.pop("infinity_extra_modality", UNSET)
+        infinity_extra_modality: Union[Unset, OpenAIEmbeddingInputTextInfinityExtraModality]
+        if isinstance(_infinity_extra_modality, Unset):
+            infinity_extra_modality = UNSET
+        else:
+            infinity_extra_modality = OpenAIEmbeddingInputTextInfinityExtraModality(_infinity_extra_modality)
+
+        open_ai_embedding_input_text = cls(
             input_=input_,
             model=model,
             encoding_format=encoding_format,
             user=user,
+            infinity_extra_modality=infinity_extra_modality,
         )
 
-        open_ai_embedding_input.additional_properties = d
-        return open_ai_embedding_input
+        open_ai_embedding_input_text.additional_properties = d
+        return open_ai_embedding_input_text
 
     @property
     def additional_keys(self) -> List[str]:
diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py
new file mode 100644
index 00000000..2d38fd9e
--- /dev/null
+++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class OpenAIEmbeddingInputTextInfinityExtraModality(str, Enum):
+    TEXT = "text"
+
+    def __str__(self) -> str:
+        return str(self.value)
diff --git a/libs/client_infinity/infinity_client/poetry.lock b/libs/client_infinity/infinity_client/poetry.lock
deleted file mode 100644
index b546f420..00000000
--- a/libs/client_infinity/infinity_client/poetry.lock
+++ /dev/null
@@ -1,190 +0,0 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
-
-[[package]]
-name = "anyio"
-version = "4.5.0"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "anyio-4.5.0-py3-none-any.whl", hash = "sha256:fdeb095b7cc5a5563175eedd926ec4ae55413bb4be5770c424af0ba46ccb4a78"},
-    {file = "anyio-4.5.0.tar.gz", hash = "sha256:c5a275fe5ca0afd788001f58fca1e69e29ce706d746e317d660e21f70c530ef9"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"]
-trio = ["trio (>=0.26.1)"]
-
-[[package]]
-name = "attrs"
-version = "24.2.0"
-description = "Classes Without Boilerplate"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"},
-    {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"},
-]
-
-[package.extras]
-benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
-tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
-
-[[package]]
-name = "certifi"
-version = "2024.8.30"
-description = "Python package for providing Mozilla's CA Bundle."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
-    {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
-]
-
-[[package]]
-name = "exceptiongroup"
-version = "1.2.2"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
-    {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
-    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.5"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
-    {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.26.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.27.2"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
-    {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-httpcore = "==1.*"
-idna = "*"
-sniffio = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-zstd = ["zstandard (>=0.18.0)"]
-
-[[package]]
-name = "idna"
-version = "3.10"
-description = "Internationalized Domain Names in Applications (IDNA)"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
-    {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
-]
-
-[package.extras]
-all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
-
-[[package]]
-name = "python-dateutil"
-version = "2.9.0.post0"
-description = "Extensions to the standard Python datetime module"
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-files = [
-    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
-    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
-]
-
-[package.dependencies]
-six = ">=1.5"
-
-[[package]]
-name = "six"
-version = "1.16.0"
-description = "Python 2 and 3 compatibility utilities"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
-    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
-]
-
-[[package]]
-name = "sniffio"
-version = "1.3.1"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
-    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
-]
-
-[[package]]
-name = "typing-extensions"
-version = "4.12.2"
-description = "Backported and Experimental Type Hints for Python 3.8+"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
-    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
-]
-
-[metadata]
-lock-version = "2.0"
-python-versions = "^3.8"
-content-hash = "d60bbf780385a47ffdf4a33a182fae5677d1ce66444290d6c2a48ba544f347da"
diff --git a/libs/client_infinity/infinity_client/pyproject.toml b/libs/client_infinity/infinity_client/pyproject.toml
index 2952170b..46e56b77 100644
--- a/libs/client_infinity/infinity_client/pyproject.toml
+++ b/libs/client_infinity/infinity_client/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "infinity_client"
-version = "0.0.59"
+version = "0.0.58"
 description = "A client library for accessing ♾️ Infinity - Embedding Inference Server"
 authors = []
 readme = "README.md"
diff --git a/libs/client_infinity/run_generate_with_hook.sh b/libs/client_infinity/run_generate_with_hook.sh
new file mode 100755
index 00000000..6c316293
--- /dev/null
+++ b/libs/client_infinity/run_generate_with_hook.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Function to handle cleanup
+cleanup() {
+  echo "Cleaning up..."
+  if [[ -n "${INFINITY_PID:-}" ]]; then
+    kill "$INFINITY_PID"
+  fi
+}
+
+# Set up the trap to run the cleanup function on EXIT or any error
+trap cleanup EXIT
+
+# Start infinity_emb in the background
+infinity_emb v2 --log-level error --engine debugengine &
+INFINITY_PID=$!
+echo "infinity_emb started with PID $INFINITY_PID"
+
+# Wait for infinity_emb to be ready
+for i in {1..10}; do
+  if wget -q --spider http://0.0.0.0:7997/openapi.json; then
+    echo "infinity_emb is ready."
+    break
+  else
+    echo "Waiting for infinity_emb to be ready..."
+    sleep 1
+  fi
+done
+
+# Run the tests
+cd infinity_client && \
+poetry install && \
+poetry run pip install pytest requests && \
+poetry run python -m pytest ../tests
+
+# Cleanup will be called due to the trap
\ No newline at end of file
diff --git a/libs/client_infinity/run_tests_with_hook.sh b/libs/client_infinity/run_tests_with_hook.sh
index b42c4a2b..5ea1d773 100755
--- a/libs/client_infinity/run_tests_with_hook.sh
+++ b/libs/client_infinity/run_tests_with_hook.sh
@@ -1,21 +1,40 @@
 #!/bin/bash
 
+set -euo pipefail
+
 # Function to handle cleanup
 cleanup() {
   echo "Cleaning up..."
-  pkill -f infinity_emb
+  if [[ -n "${INFINITY_PID:-}" ]]; then
+    kill "$INFINITY_PID"
+  fi
 }
 
 # Set up the trap to run the cleanup function on EXIT or any error
 trap cleanup EXIT
 
 # Start infinity_emb in the background
-infinity_emb v2 --log-level error &
-echo "infinity_emb started with PID $!"
+infinity_emb v2 --log-level error --engine debugengine &
+INFINITY_PID=$!
+echo "infinity_emb started with PID $INFINITY_PID"
+
+# Wait for infinity_emb to be ready
+for i in {1..10}; do
+  if wget -q --spider http://0.0.0.0:7997/openapi.json; then
+    echo "infinity_emb is ready."
+    break
+  else
+    echo "Waiting for infinity_emb to be ready..."
+    sleep 1
+  fi
+done
 
 # Run the tests
-cd infinity_client && \
-poetry install && \
-poetry run pip install pytest requests && \
-poetry run python -m pytest ../tests
+pip install openapi-python-client==0.21.1
+	 openapi-python-client generate  \
+	  --url http://0.0.0.0:7997/openapi.json \
+	  --config client_config.yaml \
+	   --overwrite \
+	   --custom-template-path=./template
+
 # Cleanup will be called due to the trap
\ No newline at end of file
diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile
index edd74370..b9e6faa4 100644
--- a/libs/infinity_emb/Makefile
+++ b/libs/infinity_emb/Makefile
@@ -3,7 +3,7 @@
 # Default target executed when no arguments are given to make.
 all: help
 
-precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs test 
+precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs openapi test 
 
 ######################
 # TESTING AND COVERAGE
@@ -22,7 +22,7 @@ test tests:
 	poetry run pytest 
 
 openapi:
-	wget http://0.0.0.0:7997/openapi.json -O ../../docs/assets/openapi.json
+	./../../docs/assets/create_openapi_with_server_hook.sh
 
 ######################
 # LINTING AND FORMATTING
@@ -60,17 +60,7 @@ benchmark_embed: tests/data/benchmark/benchmark_embed.json
 
 # Generate CLI v2 documentation
 cli_v2_docs:
-	@echo 'Generating CLI v2 documentation...'
-	@echo '# CLI v2 Documentation' > ../../docs/docs/cli_v2.md
-	@echo >> ../../docs/docs/cli_v2.md
-	@echo 'The current version of Infinity uses the following arguments in its CLI:' >> ../../docs/docs/cli_v2.md
-	@echo 'Note: The section below is auto-generated by the makefile.' >> ../../docs/docs/cli_v2.md
-	@echo >> ../../docs/docs/cli_v2.md
-	@echo '```bash' >> ../../docs/docs/cli_v2.md
-	@echo '$ infinity_emb v2 --help' >> ../../docs/docs/cli_v2.md
-	poetry run infinity_emb v2 --help >> ../../docs/docs/cli_v2.md
-	@echo '```' >> ../../docs/docs/cli_v2.md
-	@echo 'CLI v2 documentation generated and saved to ../../docs/docs/cli_v2.md.'
+	./../../docs/assets/create_cli_v2_docs.sh
 
 ######################
 # HELP
diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
index 2541f44b..53ce192a 100644
--- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
+++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
@@ -15,7 +15,7 @@
     from infinity_emb.primitives import ClassifyReturnType, EmbeddingReturnType
 
 from infinity_emb._optional_imports import CHECK_PYDANTIC
-from infinity_emb.primitives import EmbeddingEncodingFormat
+from infinity_emb.primitives import EmbeddingEncodingFormat, Modality
 
 # potential backwards compatibility to pydantic 1.X
 # pydantic 2.x is preferred by not strictly needed
@@ -23,6 +23,14 @@
     from pydantic import BaseModel, Field, conlist
 
     try:
+        from pydantic import (
+            BaseModel,
+            Discriminator,
+            Field,
+            RootModel,
+            Tag,
+        )
+
         from .data_uri import DataURI
         from .pydantic_v2 import (
             INPUT_STRING,
@@ -51,6 +59,15 @@
     class BaseModel:  # type: ignore[no-redef]
         pass
 
+    class RootModel:  # type: ignore
+        pass
+
+    class Tag:  # type: ignore
+        pass
+
+    class HttpUrl:  # type: ignore
+        pass
+
     class DataURI:  # type: ignore
         pass
 
@@ -66,7 +83,15 @@ class _Usage(BaseModel):
     total_tokens: int
 
 
-class OpenAIEmbeddingInput(BaseModel):
+class _OpenAIEmbeddingInput(BaseModel):
+    model: str = "default/not-specified"
+    encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float
+    user: Optional[str] = None
+
+
+class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput):
+    """helper"""
+
     input: Union[  # type: ignore
         conlist(  # type: ignore
             Annotated[str, INPUT_STRING],
@@ -74,12 +99,56 @@ class OpenAIEmbeddingInput(BaseModel):
         ),
         Annotated[str, INPUT_STRING],
     ]
-    model: str = "default/not-specified"
-    encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float
-    user: Optional[str] = None
+    infinity_extra_modality: Literal[Modality.text] = Modality.text  # type: ignore
+
+
+class _OpenAIEmbeddingInput_URI(_OpenAIEmbeddingInput):
+    """helper"""
+
+    input: Union[  # type: ignore
+        conlist(  # type: ignore
+            DataURIorURL,
+            **ITEMS_LIMIT_SMALL,
+        ),
+        DataURIorURL,
+    ]
+
+
+class OpenAIEmbeddingInput_Audio(_OpenAIEmbeddingInput_URI):
+    infinity_extra_modality: Literal[Modality.audio] = Modality.audio  # type: ignore
+
+
+class OpenAIEmbeddingInput_Image(_OpenAIEmbeddingInput_URI):
+    infinity_extra_modality: Literal[Modality.image] = Modality.image  # type: ignore
+
+
+def get_infinity_extra_modality(obj: dict) -> str:
+    """resolve the modality of the extra_body.
+    If not present, default to text
+
+    Function name is used to return error message, keep it explicit
+    """
+    try:
+        return obj.get("infinity_extra_modality", Modality.text.value)
+    except AttributeError:
+        # in case a very weird request is sent, validate it against the default
+        return Modality.text.value
+
+
+class MultiModalOpenAIEmbedding(RootModel):
+    root: Annotated[
+        Union[
+            Annotated[_OpenAIEmbeddingInput_Text, Tag(Modality.text.value)],
+            Annotated[OpenAIEmbeddingInput_Audio, Tag(Modality.audio.value)],
+            Annotated[OpenAIEmbeddingInput_Image, Tag(Modality.image.value)],
+        ],
+        Discriminator(get_infinity_extra_modality),
+    ]
 
 
 class ImageEmbeddingInput(BaseModel):
+    """LEGACY, DO NO LONGER UPDATE"""
+
     input: Union[  # type: ignore
         conlist(  # type: ignore
             DataURIorURL,
@@ -93,6 +162,8 @@ class ImageEmbeddingInput(BaseModel):
 
 
 class AudioEmbeddingInput(ImageEmbeddingInput):
+    """LEGACY, DO NO LONGER UPDATE"""
+
     pass
 
 
@@ -118,9 +189,10 @@ def to_embeddings_response(
         encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float,
     ) -> dict[str, Union[str, list[dict], dict]]:
         if encoding_format == EmbeddingEncodingFormat.base64:
-            assert (
-                not engine_args.embedding_dtype.uses_bitpacking()
-            ), f"model {engine_args.served_model_name} does not support base64 encoding, as it uses uint8-bitpacking with {engine_args.embedding_dtype}"
+            if engine_args.embedding_dtype.uses_bitpacking():
+                raise ValueError(
+                    f"model {engine_args.served_model_name} does not support base64 encoding, as it uses uint8-bitpacking with {engine_args.embedding_dtype}"
+                )
             embeddings = [base64.b64encode(np.frombuffer(emb.astype(np.float32), dtype=np.float32)) for emb in embeddings]  # type: ignore
 
         return dict(
diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py
index 427fceba..507bac88 100644
--- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py
+++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py
@@ -138,8 +138,8 @@ async def embed(
         """
         if "embed" not in self.model_worker.capabilities:
             raise ModelNotDeployedError(
-                "the loaded moded cannot fullyfill `embed`."
-                f"options are {self.model_worker.capabilities}."
+                "the loaded moded cannot fullyfill `embed`. "
+                f"Options are {self.model_worker.capabilities}."
             )
         input_sentences = [EmbeddingSingle(sentence=s) for s in sentences]
 
@@ -166,8 +166,8 @@ async def rerank(
         """
         if "rerank" not in self.model_worker.capabilities:
             raise ModelNotDeployedError(
-                "the loaded moded cannot fullyfill `rerank`."
-                f"options are {self.model_worker.capabilities}."
+                "the loaded moded cannot fullyfill `rerank`. "
+                f"Options are {self.model_worker.capabilities}."
             )
         rerankables = [ReRankSingle(query=query, document=doc) for doc in docs]
         scores, usage = await self._schedule(rerankables)
@@ -197,8 +197,8 @@ async def classify(
         """
         if "classify" not in self.model_worker.capabilities:
             raise ModelNotDeployedError(
-                "the loaded moded cannot fullyfill `classify`."
-                f"options are {self.model_worker.capabilities}."
+                "the loaded moded cannot fullyfill `classify`. "
+                f"Options are {self.model_worker.capabilities}."
             )
         items = [PredictSingle(sentence=s) for s in sentences]
         classifications, usage = await self._schedule(items)
@@ -230,8 +230,8 @@ async def image_embed(
 
         if "image_embed" not in self.model_worker.capabilities:
             raise ModelNotDeployedError(
-                "the loaded moded cannot fullyfill `image_embed`."
-                f"options are {self.model_worker.capabilities}."
+                "the loaded moded cannot fullyfill `image_embed`. "
+                f"Options are {self.model_worker.capabilities}."
             )
 
         items = await resolve_images(images)
@@ -259,8 +259,8 @@ async def audio_embed(
 
         if "audio_embed" not in self.model_worker.capabilities:
             raise ModelNotDeployedError(
-                "the loaded moded cannot fullyfill `audio_embed`."
-                f"options are {self.model_worker.capabilities}."
+                "the loaded moded cannot fullyfill `audio_embed`. "
+                f"Options are {self.model_worker.capabilities}."
             )
 
         items = await resolve_audios(
diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py
index 512d3096..405ff4ab 100644
--- a/libs/infinity_emb/infinity_emb/infinity_server.py
+++ b/libs/infinity_emb/infinity_emb/infinity_server.py
@@ -21,7 +21,7 @@
     ClassifyResult,
     DataURIorURL,
     ImageEmbeddingInput,
-    OpenAIEmbeddingInput,
+    MultiModalOpenAIEmbedding,
     OpenAIEmbeddingResult,
     OpenAIModelInfo,
     RerankInput,
@@ -35,6 +35,7 @@
     EmbeddingDtype,
     ImageCorruption,
     InferenceEngine,
+    Modality,
     ModelNotDeployedError,
     PoolingMethod,
 )
@@ -224,25 +225,116 @@ def _resolve_mixed_input(
         dependencies=route_dependencies,
         operation_id="embeddings",
     )
-    async def _embeddings(data: OpenAIEmbeddingInput):
-        """Encode Embeddings
+    async def _embeddings(data: MultiModalOpenAIEmbedding):
+        """Encode Embeddings. Supports with multimodal inputs.
 
+        ## Running Text Embeddings
         ```python
-        import requests
+        import requests, base64
         requests.post("http://..:7997/embeddings",
-            json={"model":"BAAI/bge-small-en-v1.5","input":["A sentence to encode."]})
+            json={"model":"openai/clip-vit-base-patch32","input":["Two cute cats."]})
+        ```
+
+        ## Running Image Embeddings
+        ```python
+        requests.post("http://..:7997/embeddings",
+            json={
+                "model": "openai/clip-vit-base-patch32",
+                "encoding_format": "base64",
+                "input": [
+                    http://images.cocodataset.org/val2017/000000039769.jpg",
+                    # can also be base64 encoded
+                ],
+                # set extra modality to image to process as image
+                "infinity_extra_modality": "image"
+        )
+        ```
+
+        ## Running Audio Embeddings
+        ```python
+        import requests, base64
+        url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"
+
+        def url_to_base64(url, modality = "image"):
+            '''small helper to convert url to base64 without server requiring access to the url'''
+            response = requests.get(url)
+            response.raise_for_status()
+            base64_encoded = base64.b64encode(response.content).decode('utf-8')
+            mimetype = f"{modality}/{url.split('.')[-1]}"
+            return f"data:{mimetype};base64,{base64_encoded}"
+
+        requests.post("http://localhost:7997/embeddings",
+            json={
+                "model": "laion/larger_clap_general",
+                "encoding_format": "float",
+                "input": [
+                    url, url_to_base64(url, "audio")
+                ],
+                # set extra modality to audio to process as audio
+                "infinity_extra_modality": "audio"
+            }
+        )
+        ```
+
+        ## Running via OpenAI Client
+        ```python
+        from openai import OpenAI # pip install openai==1.51.0
+        client = OpenAI(base_url="http://localhost:7997/")
+        client.embeddings.create(
+            model="laion/larger_clap_general",
+            input=[url_to_base64(url, "audio")],
+            encoding_format= "base64",
+            extra_body={
+                "infinity_extra_modality": "audio"
+            }
+        )
+
+        client.embeddings.create(
+            model="laion/larger_clap_general",
+            input=["the sound of a beep", "the sound of a cat"],
+            encoding_format= "base64",
+            extra_body={
+                "infinity_extra_modality": "text"
+            }
+        )
+        ```
+
+        ### Hint: Run all the above models on one server:
+        ```bash
+        infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general
         ```
         """
-        engine = _resolve_engine(data.model)
 
-        try:
-            if isinstance(data.input, str):
-                data.input = [data.input]
+        modality = data.root.infinity_extra_modality
+        data_root = data.root
+        engine = _resolve_engine(data_root.model)
 
-            logger.debug("[📝] Received request with %s inputs ", len(data.input))
+        try:
             start = time.perf_counter()
-
-            embedding, usage = await engine.embed(sentences=data.input)
+            if modality == Modality.text:
+                if isinstance(data_root.input, str):
+                    input_ = [data_root.input]
+                else:
+                    input_ = data_root.input  # type: ignore
+                logger.debug(
+                    "[📝] Received request with %s input texts ",
+                    len(input_),  # type: ignore
+                )
+                embedding, usage = await engine.embed(sentences=input_)
+            elif modality == Modality.audio:
+                urls_or_bytes = _resolve_mixed_input(data_root.input)  # type: ignore
+                logger.debug(
+                    "[📝] Received request with %s input audios ",
+                    len(urls_or_bytes),  # type: ignore
+                )
+                embedding, usage = await engine.audio_embed(audios=urls_or_bytes)
+            elif modality == Modality.image:
+                urls_or_bytes = _resolve_mixed_input(data_root.input)  # type: ignore
+                logger.debug(
+                    "[📝] Received request with %s input images ",
+                    len(urls_or_bytes),  # type: ignore
+                )
+                embedding, usage = await engine.image_embed(images=urls_or_bytes)
 
             duration = (time.perf_counter() - start) * 1000
             logger.debug("[✅] Done in %s ms", duration)
@@ -250,12 +342,22 @@ async def _embeddings(data: OpenAIEmbeddingInput):
             return OpenAIEmbeddingResult.to_embeddings_response(
                 embeddings=embedding,
                 engine_args=engine.engine_args,
-                encoding_format=data.encoding_format,
+                encoding_format=data_root.encoding_format,
                 usage=usage,
             )
         except ModelNotDeployedError as ex:
             raise errors.OpenAIException(
-                f"ModelNotDeployedError: model=`{data.model}` does not support `embed`. Reason: {ex}",
+                f"ModelNotDeployedError: model=`{data_root.model}` does not support `embed` for modality `{modality.value}`. Reason: {ex}",
+                code=status.HTTP_400_BAD_REQUEST,
+            )
+        except (ImageCorruption, AudioCorruption) as ex:
+            # get urls_or_bytes if not defined
+            try:
+                urls_or_bytes = urls_or_bytes
+            except NameError:
+                urls_or_bytes = []
+            raise errors.OpenAIException(
+                f"{modality.value}Corruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}",
                 code=status.HTTP_400_BAD_REQUEST,
             )
         except Exception as ex:
@@ -368,6 +470,8 @@ async def _classify(data: ClassifyInput):
         response_class=responses.ORJSONResponse,
         dependencies=route_dependencies,
         operation_id="embeddings_image",
+        deprecated=True,
+        summary="Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`",
     )
     async def _embeddings_image(data: ImageEmbeddingInput):
         """Encode Embeddings from Image files
@@ -425,6 +529,8 @@ async def _embeddings_image(data: ImageEmbeddingInput):
         response_class=responses.ORJSONResponse,
         dependencies=route_dependencies,
         operation_id="embeddings_audio",
+        deprecated=True,
+        summary="Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`",
     )
     async def _embeddings_audio(data: AudioEmbeddingInput):
         """Encode Embeddings from Audio files
@@ -799,7 +905,7 @@ def v2(
 
     def cli():
         if len(sys.argv) == 1 or sys.argv[1] not in ["v1", "v2", "help", "--help"]:
-            for _ in range(9):
+            for _ in range(3):
                 logger.error(
                     "Error: No command given. Defaulting to `v1`. "
                     "Relying on this side effect is considered an error and "
diff --git a/libs/infinity_emb/infinity_emb/primitives.py b/libs/infinity_emb/infinity_emb/primitives.py
index 890f4800..00fe8682 100644
--- a/libs/infinity_emb/infinity_emb/primitives.py
+++ b/libs/infinity_emb/infinity_emb/primitives.py
@@ -402,3 +402,9 @@ class AudioCorruption(Exception):
 
 
 ModelCapabilites = Literal["embed", "rerank", "classify", "image_embed", "audio_embed"]
+
+
+class Modality(str, enum.Enum):
+    text = "text"
+    audio = "audio"
+    image = "image"
diff --git a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py
index 999fea45..6dac01d6 100644
--- a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py
+++ b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py
@@ -24,14 +24,16 @@ async def resolve_audio(
         try:
             audio_bytes = io.BytesIO(audio)
         except Exception as e:
-            raise AudioCorruption(f"Error opening audio: {e}")
+            raise AudioCorruption(f"Error opening audio from bytes: {e}")
     else:
         try:
             downloaded = await (await session.get(audio)).read()
-            # downloaded = requests.get(audio, stream=True).content
+            #
             audio_bytes = io.BytesIO(downloaded)
         except Exception as e:
-            raise AudioCorruption(f"Error downloading audio.\nError msg: {str(e)}")
+            raise AudioCorruption(
+                f"Error downloading audio from {audio}. \nError msg: {str(e)}"
+            )
 
     try:
         data, rate = sf.read(audio_bytes)
diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock
index 869244bf..331bfcba 100644
--- a/libs/infinity_emb/poetry.lock
+++ b/libs/infinity_emb/poetry.lock
@@ -695,6 +695,17 @@ files = [
     {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
 [[package]]
 name = "einops"
 version = "0.8.0"
@@ -757,22 +768,23 @@ test = ["pytest (>=6)"]
 
 [[package]]
 name = "fastapi"
-version = "0.110.2"
+version = "0.115.0"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fastapi-0.110.2-py3-none-any.whl", hash = "sha256:239403f2c0a3dda07a9420f95157a7f014ddb2b770acdbc984f9bdf3ead7afdb"},
-    {file = "fastapi-0.110.2.tar.gz", hash = "sha256:b53d673652da3b65e8cd787ad214ec0fe303cad00d2b529b86ce7db13f17518d"},
+    {file = "fastapi-0.115.0-py3-none-any.whl", hash = "sha256:17ea427674467486e997206a5ab25760f6b09e069f099b96f5b55a32fb6f1631"},
+    {file = "fastapi-0.115.0.tar.gz", hash = "sha256:f93b4ca3529a8ebc6fc3fcf710e5efa8de3df9b41570958abf1d97d843138004"},
 ]
 
 [package.dependencies]
 pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
-starlette = ">=0.37.2,<0.38.0"
+starlette = ">=0.37.2,<0.39.0"
 typing-extensions = ">=4.8.0"
 
 [package.extras]
-all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
+all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
+standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"]
 
 [[package]]
 name = "filelock"
@@ -1250,6 +1262,76 @@ MarkupSafe = ">=2.0"
 [package.extras]
 i18n = ["Babel (>=2.7)"]
 
+[[package]]
+name = "jiter"
+version = "0.5.0"
+description = "Fast iterable JSON parser."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jiter-0.5.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b599f4e89b3def9a94091e6ee52e1d7ad7bc33e238ebb9c4c63f211d74822c3f"},
+    {file = "jiter-0.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a063f71c4b06225543dddadbe09d203dc0c95ba352d8b85f1221173480a71d5"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acc0d5b8b3dd12e91dd184b87273f864b363dfabc90ef29a1092d269f18c7e28"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c22541f0b672f4d741382a97c65609332a783501551445ab2df137ada01e019e"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63314832e302cc10d8dfbda0333a384bf4bcfce80d65fe99b0f3c0da8945a91a"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a25fbd8a5a58061e433d6fae6d5298777c0814a8bcefa1e5ecfff20c594bd749"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:503b2c27d87dfff5ab717a8200fbbcf4714516c9d85558048b1fc14d2de7d8dc"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d1f3d27cce923713933a844872d213d244e09b53ec99b7a7fdf73d543529d6d"},
+    {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c95980207b3998f2c3b3098f357994d3fd7661121f30669ca7cb945f09510a87"},
+    {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:afa66939d834b0ce063f57d9895e8036ffc41c4bd90e4a99631e5f261d9b518e"},
+    {file = "jiter-0.5.0-cp310-none-win32.whl", hash = "sha256:f16ca8f10e62f25fd81d5310e852df6649af17824146ca74647a018424ddeccf"},
+    {file = "jiter-0.5.0-cp310-none-win_amd64.whl", hash = "sha256:b2950e4798e82dd9176935ef6a55cf6a448b5c71515a556da3f6b811a7844f1e"},
+    {file = "jiter-0.5.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d4c8e1ed0ef31ad29cae5ea16b9e41529eb50a7fba70600008e9f8de6376d553"},
+    {file = "jiter-0.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c6f16e21276074a12d8421692515b3fd6d2ea9c94fd0734c39a12960a20e85f3"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5280e68e7740c8c128d3ae5ab63335ce6d1fb6603d3b809637b11713487af9e6"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:583c57fc30cc1fec360e66323aadd7fc3edeec01289bfafc35d3b9dcb29495e4"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26351cc14507bdf466b5f99aba3df3143a59da75799bf64a53a3ad3155ecded9"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4829df14d656b3fb87e50ae8b48253a8851c707da9f30d45aacab2aa2ba2d614"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42a4bdcf7307b86cb863b2fb9bb55029b422d8f86276a50487982d99eed7c6e"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04d461ad0aebf696f8da13c99bc1b3e06f66ecf6cfd56254cc402f6385231c06"},
+    {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6375923c5f19888c9226582a124b77b622f8fd0018b843c45eeb19d9701c403"},
+    {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cec323a853c24fd0472517113768c92ae0be8f8c384ef4441d3632da8baa646"},
+    {file = "jiter-0.5.0-cp311-none-win32.whl", hash = "sha256:aa1db0967130b5cab63dfe4d6ff547c88b2a394c3410db64744d491df7f069bb"},
+    {file = "jiter-0.5.0-cp311-none-win_amd64.whl", hash = "sha256:aa9d2b85b2ed7dc7697597dcfaac66e63c1b3028652f751c81c65a9f220899ae"},
+    {file = "jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9f664e7351604f91dcdd557603c57fc0d551bc65cc0a732fdacbf73ad335049a"},
+    {file = "jiter-0.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:044f2f1148b5248ad2c8c3afb43430dccf676c5a5834d2f5089a4e6c5bbd64df"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:702e3520384c88b6e270c55c772d4bd6d7b150608dcc94dea87ceba1b6391248"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:528d742dcde73fad9d63e8242c036ab4a84389a56e04efd854062b660f559544"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8cf80e5fe6ab582c82f0c3331df27a7e1565e2dcf06265afd5173d809cdbf9ba"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44dfc9ddfb9b51a5626568ef4e55ada462b7328996294fe4d36de02fce42721f"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c451f7922992751a936b96c5f5b9bb9312243d9b754c34b33d0cb72c84669f4e"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:308fce789a2f093dca1ff91ac391f11a9f99c35369117ad5a5c6c4903e1b3e3a"},
+    {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7f5ad4a7c6b0d90776fdefa294f662e8a86871e601309643de30bf94bb93a64e"},
+    {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ea189db75f8eca08807d02ae27929e890c7d47599ce3d0a6a5d41f2419ecf338"},
+    {file = "jiter-0.5.0-cp312-none-win32.whl", hash = "sha256:e3bbe3910c724b877846186c25fe3c802e105a2c1fc2b57d6688b9f8772026e4"},
+    {file = "jiter-0.5.0-cp312-none-win_amd64.whl", hash = "sha256:a586832f70c3f1481732919215f36d41c59ca080fa27a65cf23d9490e75b2ef5"},
+    {file = "jiter-0.5.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f04bc2fc50dc77be9d10f73fcc4e39346402ffe21726ff41028f36e179b587e6"},
+    {file = "jiter-0.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6f433a4169ad22fcb550b11179bb2b4fd405de9b982601914ef448390b2954f3"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad4a6398c85d3a20067e6c69890ca01f68659da94d74c800298581724e426c7e"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6baa88334e7af3f4d7a5c66c3a63808e5efbc3698a1c57626541ddd22f8e4fbf"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ece0a115c05efca597c6d938f88c9357c843f8c245dbbb53361a1c01afd7148"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:335942557162ad372cc367ffaf93217117401bf930483b4b3ebdb1223dbddfa7"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:649b0ee97a6e6da174bffcb3c8c051a5935d7d4f2f52ea1583b5b3e7822fbf14"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4be354c5de82157886ca7f5925dbda369b77344b4b4adf2723079715f823989"},
+    {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5206144578831a6de278a38896864ded4ed96af66e1e63ec5dd7f4a1fce38a3a"},
+    {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8120c60f8121ac3d6f072b97ef0e71770cc72b3c23084c72c4189428b1b1d3b6"},
+    {file = "jiter-0.5.0-cp38-none-win32.whl", hash = "sha256:6f1223f88b6d76b519cb033a4d3687ca157c272ec5d6015c322fc5b3074d8a5e"},
+    {file = "jiter-0.5.0-cp38-none-win_amd64.whl", hash = "sha256:c59614b225d9f434ea8fc0d0bec51ef5fa8c83679afedc0433905994fb36d631"},
+    {file = "jiter-0.5.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0af3838cfb7e6afee3f00dc66fa24695199e20ba87df26e942820345b0afc566"},
+    {file = "jiter-0.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:550b11d669600dbc342364fd4adbe987f14d0bbedaf06feb1b983383dcc4b961"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:489875bf1a0ffb3cb38a727b01e6673f0f2e395b2aad3c9387f94187cb214bbf"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b250ca2594f5599ca82ba7e68785a669b352156260c5362ea1b4e04a0f3e2389"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ea18e01f785c6667ca15407cd6dabbe029d77474d53595a189bdc813347218e"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462a52be85b53cd9bffd94e2d788a09984274fe6cebb893d6287e1c296d50653"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92cc68b48d50fa472c79c93965e19bd48f40f207cb557a8346daa020d6ba973b"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c834133e59a8521bc87ebcad773608c6fa6ab5c7a022df24a45030826cf10bc"},
+    {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab3a71ff31cf2d45cb216dc37af522d335211f3a972d2fe14ea99073de6cb104"},
+    {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cccd3af9c48ac500c95e1bcbc498020c87e1781ff0345dd371462d67b76643eb"},
+    {file = "jiter-0.5.0-cp39-none-win32.whl", hash = "sha256:368084d8d5c4fc40ff7c3cc513c4f73e02c85f6009217922d0823a48ee7adf61"},
+    {file = "jiter-0.5.0-cp39-none-win_amd64.whl", hash = "sha256:ce03f7b4129eb72f1687fa11300fbf677b02990618428934662406d2a76742a1"},
+    {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"},
+]
+
 [[package]]
 name = "joblib"
 version = "1.4.2"
@@ -2061,6 +2143,30 @@ packaging = "*"
 protobuf = "*"
 sympy = "*"
 
+[[package]]
+name = "openai"
+version = "1.51.0"
+description = "The official Python library for the openai API"
+optional = false
+python-versions = ">=3.7.1"
+files = [
+    {file = "openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c"},
+    {file = "openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d"},
+]
+
+[package.dependencies]
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+jiter = ">=0.4.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.11,<5"
+
+[package.extras]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
+
 [[package]]
 name = "optimum"
 version = "1.22.0"
@@ -3359,7 +3465,7 @@ files = [
 name = "setuptools"
 version = "75.1.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "setuptools-75.1.0-py3-none-any.whl", hash = "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2"},
@@ -3660,31 +3766,31 @@ files = [
 
 [[package]]
 name = "torch"
-version = "2.4.0"
+version = "2.4.1"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"},
-    {file = "torch-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c4ca297b7bd58b506bfd6e78ffd14eb97c0e7797dcd7965df62f50bb575d8954"},
-    {file = "torch-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2497cbc7b3c951d69b276ca51fe01c2865db67040ac67f5fc20b03e41d16ea4a"},
-    {file = "torch-2.4.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:685418ab93730efbee71528821ff54005596970dd497bf03c89204fb7e3f71de"},
-    {file = "torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e743adadd8c8152bb8373543964551a7cb7cc20ba898dc8f9c0cdbe47c283de0"},
-    {file = "torch-2.4.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7334325c0292cbd5c2eac085f449bf57d3690932eac37027e193ba775703c9e6"},
-    {file = "torch-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:97730014da4c57ffacb3c09298c6ce05400606e890bd7a05008d13dd086e46b1"},
-    {file = "torch-2.4.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f169b4ea6dc93b3a33319611fcc47dc1406e4dd539844dcbd2dec4c1b96e166d"},
-    {file = "torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:997084a0f9784d2a89095a6dc67c7925e21bf25dea0b3d069b41195016ccfcbb"},
-    {file = "torch-2.4.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:bc3988e8b36d1e8b998d143255d9408d8c75da4ab6dd0dcfd23b623dfb0f0f57"},
-    {file = "torch-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:3374128bbf7e62cdaed6c237bfd39809fbcfaa576bee91e904706840c3f2195c"},
-    {file = "torch-2.4.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:91aaf00bfe1ffa44dc5b52809d9a95129fca10212eca3ac26420eb11727c6288"},
-    {file = "torch-2.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cc30457ea5489c62747d3306438af00c606b509d78822a88f804202ba63111ed"},
-    {file = "torch-2.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a046491aaf96d1215e65e1fa85911ef2ded6d49ea34c8df4d0638879f2402eef"},
-    {file = "torch-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:688eec9240f3ce775f22e1e1a5ab9894f3d5fe60f3f586deb7dbd23a46a83916"},
-    {file = "torch-2.4.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:3af4de2a618fb065e78404c4ba27a818a7b7957eaeff28c6c66ce7fb504b68b8"},
-    {file = "torch-2.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:618808d3f610d5f180e47a697d4ec90b810953bb1e020f424b2ac7fb0884b545"},
-    {file = "torch-2.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:ed765d232d23566052ba83632ec73a4fccde00b4c94ad45d63b471b09d63b7a7"},
-    {file = "torch-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2feb98ac470109472fb10dfef38622a7ee08482a16c357863ebc7bc7db7c8f7"},
-    {file = "torch-2.4.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8940fc8b97a4c61fdb5d46a368f21f4a3a562a17879e932eb51a5ec62310cb31"},
+    {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"},
+    {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"},
+    {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"},
+    {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"},
+    {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"},
+    {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"},
+    {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"},
+    {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"},
+    {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"},
+    {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"},
+    {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"},
+    {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"},
+    {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"},
+    {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"},
+    {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"},
+    {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"},
+    {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"},
+    {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"},
+    {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"},
+    {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"},
 ]
 
 [package.dependencies]
@@ -3703,6 +3809,7 @@ nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"
 nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+setuptools = "*"
 sympy = "*"
 triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
 typing-extensions = ">=4.8.0"
@@ -3713,42 +3820,37 @@ optree = ["optree (>=0.11.0)"]
 
 [[package]]
 name = "torchvision"
-version = "0.19.0"
+version = "0.19.1"
 description = "image and video datasets and models for torch deep learning"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "torchvision-0.19.0-1-cp310-cp310-win_amd64.whl", hash = "sha256:6ed066aae5c50465d7c4761357aefe5dbd2eb7075a33ab8c14b352fc2353ad4c"},
-    {file = "torchvision-0.19.0-1-cp311-cp311-win_amd64.whl", hash = "sha256:6b1bce2e4c003d890a18f14ff289528707d918e38539ff890ef02aa31dae1b56"},
-    {file = "torchvision-0.19.0-1-cp312-cp312-win_amd64.whl", hash = "sha256:13aee7a46e049c8c1e7d35a0394b0587a7e62ff3d1a822cd2bbbacb675ac4a09"},
-    {file = "torchvision-0.19.0-1-cp38-cp38-win_amd64.whl", hash = "sha256:2acc436d043d4f81b3bc6929cbfa4ef1cdae4d8a0b04ec72ec30a497e9a38179"},
-    {file = "torchvision-0.19.0-1-cp39-cp39-win_amd64.whl", hash = "sha256:b5f70f5a8bd9c8b00a076bf466b39b5cd679ef62587c47cc048adb04d9c5f155"},
-    {file = "torchvision-0.19.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec874ef85dcb24c69e600f6e276af892c80cde3ffdaeb7275efda463242bc2a8"},
-    {file = "torchvision-0.19.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:106842b1e475b14d9a04ee0d6f5477d43100e3bb78e9d31e37422384d0d84179"},
-    {file = "torchvision-0.19.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d467d434005fd05a227a2ba7af4c591bb67e6d4a97bbd06eda8da83f43e9fd07"},
-    {file = "torchvision-0.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:f77ac31f7337d0f6f4b58e65582c6c93b9d9eeec7dfd7478896b5cdc19a2d60d"},
-    {file = "torchvision-0.19.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dbf3aa71a3899244fc884303ed3c4604a160824fefac77e82317a5463efc1d9b"},
-    {file = "torchvision-0.19.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:ec4162dc71d9db7f0b51d0f92491929c1419605ff436e1305e50de13504a1c30"},
-    {file = "torchvision-0.19.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:4e6aa4fa3f0bc3599fa071c149e651a3e6bdd67c9161794478f9f91471c406a2"},
-    {file = "torchvision-0.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac5525d5cc09e425b5cf5752ecf66eefbbbd8c8cd945198ce35eb01a694e6069"},
-    {file = "torchvision-0.19.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c09ef8ed184fa877f6251b620226e74f682b8f1d6b341456428d4955b8d9c670"},
-    {file = "torchvision-0.19.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:02f1dd5cfc897957535b41b0258ec452d30de044e20c2de2c75869f7708e7656"},
-    {file = "torchvision-0.19.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:be0f27a28b8e9f2ae98a31af34a4bdd2a5bf154d92bd73a5797c8d2156fb3ab6"},
-    {file = "torchvision-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6ba7756f75c80212e51d3576f85ea204589e0c16efdb9b835dd677bc8929a67"},
-    {file = "torchvision-0.19.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:854e967a16a9409e941b5bbe5aa357b23f7158bccb9de35ae20fd4945f05ecd1"},
-    {file = "torchvision-0.19.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:d9afb8a3c3ce99a161a64c2a3b91cb545632a72118053cbfb84e87a02a8dcd02"},
-    {file = "torchvision-0.19.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:079a696e0b2cb52e4be30afa8e9b3d7d280f02a2b5ffedd7e821fa1efd1a5a8d"},
-    {file = "torchvision-0.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:aaa338ff3a55a8c0f94e0e64eff6fe2af1fc933a95fd43812760e72ea66e986b"},
-    {file = "torchvision-0.19.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dd1279571d4b68d5a53d9b7a35aedf91c4cb1e0b08099f6a1effa7b25b8c95e7"},
-    {file = "torchvision-0.19.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4d54b5e19b7ebebca7d0b08497b4c6335264cad04c94c05fa35988d9e9eed0c4"},
-    {file = "torchvision-0.19.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5f9a598dcf82bdfc8e4436ce74763b3877dabec3b33f94613b94ede13e3e4dee"},
-    {file = "torchvision-0.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:ec1281c10402234d470bfd4d53663d81f4364f293b2f8fe24d4a7a1adc78c90c"},
+    {file = "torchvision-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:54e8513099e6f586356c70f809d34f391af71ad182fe071cc328a28af2c40608"},
+    {file = "torchvision-0.19.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:20a1f5e02bfdad7714e55fa3fa698347c11d829fa65e11e5a84df07d93350eed"},
+    {file = "torchvision-0.19.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:7b063116164be52fc6deb4762de7f8c90bfa3a65f8d5caf17f8e2d5aadc75a04"},
+    {file = "torchvision-0.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:f40b6acabfa886da1bc3768f47679c61feee6bde90deb979d9f300df8c8a0145"},
+    {file = "torchvision-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40514282b4896d62765b8e26d7091c32e17c35817d00ec4be2362ea3ba3d1787"},
+    {file = "torchvision-0.19.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:5a91be061ae5d6d5b95e833b93e57ca4d3c56c5a57444dd15da2e3e7fba96050"},
+    {file = "torchvision-0.19.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d71a6a6fe3a5281ca3487d4c56ad4aad20ff70f82f1d7c79bcb6e7b0c2af00c8"},
+    {file = "torchvision-0.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:70dea324174f5e9981b68e4b7cd524512c106ba64aedef560a86a0bbf2fbf62c"},
+    {file = "torchvision-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27ece277ff0f6cdc7fed0627279c632dcb2e58187da771eca24b0fbcf3f8590d"},
+    {file = "torchvision-0.19.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:c659ff92a61f188a1a7baef2850f3c0b6c85685447453c03d0e645ba8f1dcc1c"},
+    {file = "torchvision-0.19.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:c07bf43c2a145d792ecd9d0503d6c73577147ece508d45600d8aac77e4cdfcf9"},
+    {file = "torchvision-0.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b4283d283675556bb0eae31d29996f53861b17cbdcdf3509e6bc050414ac9289"},
+    {file = "torchvision-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c4e4f5b24ea6b087b02ed492ab1e21bba3352c4577e2def14248cfc60732338"},
+    {file = "torchvision-0.19.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9281d63ead929bb19143731154cd1d8bf0b5e9873dff8578a40e90a6bec3c6fa"},
+    {file = "torchvision-0.19.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:4d10bc9083c4d5fadd7edd7b729700a7be48dab4f62278df3bc73fa48e48a155"},
+    {file = "torchvision-0.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:ccf085ef1824fb9e16f1901285bf89c298c62dfd93267a39e8ee42c71255242f"},
+    {file = "torchvision-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:731f434d91586769e255b5d70ed1a4457e0a1394a95f4aacf0e1e7e21f80c098"},
+    {file = "torchvision-0.19.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:febe4f14d4afcb47cc861d8be7760ab6a123cd0817f97faf5771488cb6aa90f4"},
+    {file = "torchvision-0.19.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e328309b8670a2e889b2fe76a1c2744a099c11c984da9a822357bd9debd699a5"},
+    {file = "torchvision-0.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:6616f12e00a22e7f3fedbd0fccb0804c05e8fe22871668f10eae65cf3f283614"},
 ]
 
 [package.dependencies]
 numpy = "*"
 pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0"
-torch = "2.4.0"
+torch = "2.4.1"
 
 [package.extras]
 gdown = ["gdown (>=4.7.3)"]
@@ -3876,6 +3978,11 @@ files = [
     {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
     {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
     {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
+    {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
+    {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
+    {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
+    {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
+    {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
 ]
 
 [package.dependencies]
@@ -4661,4 +4768,4 @@ vision = ["pillow", "timm"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4"
-content-hash = "81ba6bb614937cb06c75c15232bbba7af3797ec8e0936723411fad07bf211a48"
+content-hash = "806fe858fc679b8842cb847d213b8607a887cdfe58c28bab1def8bfde0fbda05"
diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml
index 55d9b61d..f9ba99cd 100644
--- a/libs/infinity_emb/pyproject.toml
+++ b/libs/infinity_emb/pyproject.toml
@@ -56,11 +56,12 @@ coverage = {extras = ["toml"], version = "^7.3.2"}
 mypy = "^1.5.1"
 requests = "2.28.1"
 types-requests = "2.28.1"
+openai = "*" # 1.51.0 works
 
 # preferred dev dependencies
-torch = "2.4.0"
+torch = "2.4.1"
 prometheus-fastapi-instrumentator = "7.0.0"
-fastapi = "0.110.2"
+fastapi = "0.115.0"
 
 [tool.poetry.group.codespell.dependencies]
 codespell = "^2.2.0"
diff --git a/libs/infinity_emb/tests/conftest.py b/libs/infinity_emb/tests/conftest.py
index 9705161a..121f4b72 100644
--- a/libs/infinity_emb/tests/conftest.py
+++ b/libs/infinity_emb/tests/conftest.py
@@ -10,7 +10,7 @@
 pytest.DEFAULT_RERANKER_MODEL = "mixedbread-ai/mxbai-rerank-xsmall-v1"
 pytest.DEFAULT_CLASSIFIER_MODEL = "SamLowe/roberta-base-go_emotions"
 pytest.DEFAULT_AUDIO_MODEL = "laion/clap-htsat-unfused"
-pytest.DEFAULT_VISION_MODEL = "wkcn/TinyCLIP-ViT-8M-16-Text-3M-YFCC15M"
+pytest.DEFAULT_IMAGE_MODEL = "wkcn/TinyCLIP-ViT-8M-16-Text-3M-YFCC15M"
 
 pytest.IMAGE_SAMPLE_URL = "https://github.com/michaelfeil/infinity/raw/06fd1f4d8f0a869f4482fc1c78b62a75ccbb66a1/docs/assets/cats_coco_sample.jpg"
 pytest.AUDIO_SAMPLE_URL = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"
diff --git a/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py b/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py
index 99fc45c9..d6362958 100644
--- a/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py
+++ b/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py
@@ -6,7 +6,6 @@
 import sys
 import time
 from unittest import TestCase
-from uuid import uuid4
 
 import numpy as np
 import pytest
@@ -18,8 +17,8 @@
 from infinity_emb.primitives import InferenceEngine
 
 PREFIX = ""
-MODEL_NAME = str(uuid4())
-MODEL_NAME_2 = str(uuid4())
+MODEL_NAME = "dummy-number-1"
+MODEL_NAME_2 = "dummy-number-2"
 BATCH_SIZE = 16
 
 PATH_OPENAPI = pathlib.Path(__file__).parent.parent.parent.parent.parent.joinpath(
@@ -67,22 +66,22 @@ async def test_model_route(client):
     assert "unix" in respnse_health.json()
 
 
-@pytest.mark.parametrize("model_name", [MODEL_NAME, MODEL_NAME_2])
 @pytest.mark.anyio
-async def test_embedding_max_length(client, model_name):
+async def test_embedding_max_length(client):
     # TOO long
-    input = "%_" * 4097 * 15
-    response = await client.post(
-        f"{PREFIX}/embeddings", json=dict(input=input, model=model_name)
-    )
-    assert response.status_code == 422, f"{response.status_code}, {response.text}"
-    # works
-    input = "%_" * 4096 * 15
-    response = await client.post(
-        f"{PREFIX}/embeddings", json=dict(input=input, model=model_name)
-    )
-    assert response.status_code == 200, f"{response.status_code}, {response.text}"
-    assert response.json()["model"] == model_name
+    for model_name in [MODEL_NAME, MODEL_NAME_2]:
+        input = "%_" * 4097 * 15
+        response = await client.post(
+            f"{PREFIX}/embeddings", json=dict(input=input, model=model_name)
+        )
+        assert response.status_code == 422, f"{response.status_code}, {response.text}"
+        # works
+        input = "%_" * 4096 * 15
+        response = await client.post(
+            f"{PREFIX}/embeddings", json=dict(input=input, model=model_name)
+        )
+        assert response.status_code == 200, f"{response.status_code}, {response.text}"
+        assert response.json()["model"] == model_name
 
 
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
diff --git a/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py b/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py
new file mode 100644
index 00000000..0702f1ad
--- /dev/null
+++ b/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py
@@ -0,0 +1,160 @@
+# type: ignore
+
+import base64
+
+import numpy as np
+import pytest
+import requests
+from asgi_lifespan import LifespanManager
+from httpx import AsyncClient
+from openai import APIConnectionError, AsyncOpenAI
+from sentence_transformers import SentenceTransformer  # type: ignore
+
+from infinity_emb import create_server
+from infinity_emb.args import EngineArgs
+
+PREFIX = ""
+MODEL: str = (
+    "michaelfeil/bge-small-en-v1.5"  #  pytest.DEFAULT_BERT_MODEL  # type: ignore
+)
+baseurl = "http://openaidemo"
+batch_size = 8
+
+app = create_server(
+    url_prefix=PREFIX,
+    engine_args_list=[
+        EngineArgs(
+            model_name_or_path=pytest.DEFAULT_AUDIO_MODEL,
+            batch_size=batch_size,
+        ),
+        EngineArgs(
+            model_name_or_path=pytest.DEFAULT_IMAGE_MODEL,
+            batch_size=batch_size,
+        ),
+        EngineArgs(
+            model_name_or_path=pytest.DEFAULT_BERT_MODEL,
+            batch_size=batch_size,
+        ),
+    ],
+    api_key="some_dummy_key",
+)
+
+
+@pytest.fixture
+def model_base() -> SentenceTransformer:
+    return SentenceTransformer(MODEL)
+
+
+@pytest.fixture()
+async def client():
+    async with AsyncClient(
+        app=app, base_url=baseurl, timeout=20
+    ) as client, LifespanManager(app):
+        yield client
+
+
+def url_to_base64(url, modality="image"):
+    """small helper to convert url to base64 without server requiring access to the url"""
+    response = requests.get(url)
+    response.raise_for_status()
+    base64_encoded = base64.b64encode(response.content).decode("utf-8")
+    mimetype = f"{modality}/{url.split('.')[-1]}"
+    return f"data:{mimetype};base64,{base64_encoded}"
+
+
+@pytest.mark.anyio
+async def test_openai(client: AsyncClient):
+    client_oai = AsyncOpenAI(
+        api_key="some_dummy_key", base_url=baseurl, http_client=client
+    )
+
+    async with client_oai:
+        # test audio
+        emb1_audio_from_text = await client_oai.embeddings.create(
+            model=pytest.DEFAULT_AUDIO_MODEL,
+            input=[
+                "the sound of a beep",
+                "the sound of a cat",
+                "the sound of a dog",
+                "the sound of a bird",
+            ],
+            encoding_format="float",
+            extra_body={"infinity_extra_modality": "text"},
+        )
+        emb1_audio = await client_oai.embeddings.create(
+            model=pytest.DEFAULT_AUDIO_MODEL,
+            input=[url_to_base64(pytest.AUDIO_SAMPLE_URL, "audio")],
+            encoding_format="float",
+            extra_body={"infinity_extra_modality": "audio"},
+        )
+        emb1_1_audio = await client_oai.embeddings.create(
+            model=pytest.DEFAULT_AUDIO_MODEL,
+            input=[pytest.AUDIO_SAMPLE_URL],
+            encoding_format="float",
+            extra_body={"infinity_extra_modality": "audio"},
+        )
+        # test: image
+        emb_1_image_from_text = await client_oai.embeddings.create(
+            model=pytest.DEFAULT_IMAGE_MODEL,
+            input=["a cat", "a dog", "a bird"],
+            encoding_format="float",
+            extra_body={"infinity_extra_modality": "text"},
+        )
+        emb_1_image = await client_oai.embeddings.create(
+            model=pytest.DEFAULT_IMAGE_MODEL,
+            input=[url_to_base64(pytest.IMAGE_SAMPLE_URL, "image")],  # image is a cat
+            encoding_format="float",
+            extra_body={"infinity_extra_modality": "image"},
+        )
+        emb_1_1_image = await client_oai.embeddings.create(
+            model=pytest.DEFAULT_IMAGE_MODEL,
+            input=[pytest.IMAGE_SAMPLE_URL],
+            encoding_format="float",
+            extra_body={"infinity_extra_modality": "image"},
+        )
+
+        # test: text
+        emb_1_text = await client_oai.embeddings.create(
+            model=pytest.DEFAULT_BERT_MODEL,
+            input=["a cat", "a cat", "a bird"],
+            encoding_format="float",
+            extra_body={"infinity_extra_modality": "text"},
+        )
+
+    # test AUDIO: cosine distance of beep to cat and dog
+    np.testing.assert_allclose(
+        emb1_audio.data[0].embedding, emb1_1_audio.data[0].embedding, rtol=1e-5
+    )
+    assert all(
+        np.dot(emb1_audio.data[0].embedding, emb1_audio_from_text.data[0].embedding)
+        > np.dot(emb1_audio.data[0].embedding, emb1_audio_from_text.data[i].embedding)
+        for i in range(1, 4)
+    )
+
+    # test IMAGE: cosine distance of cat to dog and bird
+    np.testing.assert_allclose(
+        emb_1_image.data[0].embedding, emb_1_1_image.data[0].embedding, rtol=1e-5
+    )
+    assert all(
+        np.dot(emb_1_image.data[0].embedding, emb_1_image_from_text.data[0].embedding)
+        > np.dot(emb_1_image.data[0].embedding, emb_1_image_from_text.data[i].embedding)
+        for i in range(1, 3)
+    )
+
+    # test TEXT: cosine distance of cat to dog and bird
+    np.testing.assert_allclose(
+        emb_1_text.data[0].embedding, emb_1_text.data[1].embedding, rtol=1e-5
+    )
+
+    # wrong key
+    with pytest.raises(APIConnectionError):
+        client_oai = AsyncOpenAI(
+            api_key="some_wrong", base_url=baseurl, http_client=client
+        )
+        async with client_oai:
+            await client_oai.embeddings.create(
+                model=pytest.DEFAULT_AUDIO_MODEL,
+                input=[pytest.AUDIO_SAMPLE_URL],
+                encoding_format="float",
+                extra_body={"infinity_extra_modality": "audio"},
+            )
diff --git a/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py b/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py
index 18f71d61..a93ffbb9 100644
--- a/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py
+++ b/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py
@@ -8,9 +8,7 @@
 from infinity_emb.primitives import Device, InferenceEngine
 
 PREFIX = "/v1_optimum"
-MODEL: str = (
-    "michaelfeil/bge-small-en-v1.5"  #  pytest.DEFAULT_BERT_MODEL  # type: ignore
-)
+MODEL: str = pytest.DEFAULT_BERT_MODEL  # type: ignore
 
 batch_size = 8
 
diff --git a/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py b/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py
index a1a86974..5170b347 100644
--- a/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py
+++ b/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py
@@ -8,7 +8,7 @@
 from infinity_emb.args import EngineArgs
 from infinity_emb.primitives import Device, InferenceEngine
 
-PREFIX = "/v1_ct2"
+PREFIX = "/v1_sentence_transformers"
 MODEL: str = pytest.DEFAULT_BERT_MODEL  # type: ignore[assignment]
 batch_size = 64 if torch.cuda.is_available() else 8
 
diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py
index 120e7c63..9d4b5a82 100644
--- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py
+++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py
@@ -8,7 +8,7 @@
 from infinity_emb.args import EngineArgs
 from infinity_emb.primitives import Device, InferenceEngine
 
-PREFIX = "/v1_ct2"
+PREFIX = "/v1_audio"
 MODEL: str = pytest.DEFAULT_AUDIO_MODEL  # type: ignore[assignment]
 batch_size = 32 if torch.cuda.is_available() else 8
 
@@ -116,35 +116,45 @@ async def test_meta(client, helpers):
 
 
 @pytest.mark.anyio
-@pytest.mark.parametrize("no_of_audios", [1, 5, 10])
-async def test_audio_multiple(client, no_of_audios):
-    audio_urls = [
-        "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"
-    ] * no_of_audios
-
-    response = await client.post(
-        f"{PREFIX}/embeddings_audio",
-        json={"model": MODEL, "input": audio_urls},
-    )
-    assert response.status_code == 200
-    rdata = response.json()
-    rdata_results = rdata["data"]
-    assert len(rdata_results) == no_of_audios
-    assert "model" in rdata
-    assert "usage" in rdata
-    assert rdata_results[0]["object"] == "embedding"
-    assert len(rdata_results[0]["embedding"]) > 0
+async def test_audio_multiple(client):
+    for route in [f"{PREFIX}/embeddings_audio", f"{PREFIX}/embeddings"]:
+        for no_of_audios in [1, 5, 10]:
+            audio_urls = [
+                "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"
+            ] * no_of_audios
+
+            response = await client.post(
+                route,
+                json={
+                    "model": MODEL,
+                    "input": audio_urls,
+                    "infinity_extra_modality": "audio",
+                },
+            )
+            assert response.status_code == 200
+            rdata = response.json()
+            rdata_results = rdata["data"]
+            assert len(rdata_results) == no_of_audios
+            assert "model" in rdata
+            assert "usage" in rdata
+            assert rdata_results[0]["object"] == "embedding"
+            assert len(rdata_results[0]["embedding"]) > 0
 
 
 @pytest.mark.anyio
 async def test_audio_fail(client):
-    audio_url = "https://www.google.com/404"
-
-    response = await client.post(
-        f"{PREFIX}/embeddings_audio",
-        json={"model": MODEL, "input": audio_url},
-    )
-    assert response.status_code == status.HTTP_400_BAD_REQUEST
+    for route in [f"{PREFIX}/embeddings_audio", f"{PREFIX}/embeddings"]:
+        audio_url = "https://www.google.com/404"
+
+        response = await client.post(
+            route,
+            json={
+                "model": MODEL,
+                "input": audio_url,
+                "infinity_extra_modality": "audio",
+            },
+        )
+        assert response.status_code == status.HTTP_400_BAD_REQUEST
 
 
 @pytest.mark.anyio
@@ -152,8 +162,12 @@ async def test_audio_empty(client):
     audio_url_empty = []
 
     response_empty = await client.post(
-        f"{PREFIX}/embeddings_audio",
-        json={"model": MODEL, "input": audio_url_empty},
+        f"{PREFIX}/embeddings",
+        json={
+            "model": MODEL,
+            "input": audio_url_empty,
+            "infinity_extra_modality": "audio",
+        },
     )
     assert response_empty.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
 
diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py b/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py
index 3b53aadf..fe1667b0 100644
--- a/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py
+++ b/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py
@@ -8,7 +8,7 @@
 from infinity_emb.args import EngineArgs
 from infinity_emb.primitives import Device, InferenceEngine
 
-PREFIX = "/v1_ct2"
+PREFIX = "/v1_reranker"
 MODEL: str = pytest.DEFAULT_RERANKER_MODEL  # type: ignore[assignment]
 batch_size = 32 if torch.cuda.is_available() else 8
 
diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py
index 0ca79034..0735b61e 100644
--- a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py
+++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py
@@ -13,7 +13,7 @@
 from infinity_emb.primitives import Device, InferenceEngine
 
 PREFIX = "/v1_vision"
-MODEL: str = pytest.DEFAULT_VISION_MODEL  # type: ignore[assignment]
+MODEL: str = pytest.DEFAULT_IMAGE_MODEL  # type: ignore[assignment]
 batch_size = 32 if torch.cuda.is_available() else 8
 
 app = create_server(
@@ -113,60 +113,70 @@ async def test_vision_base64(client):
 
 @pytest.mark.anyio
 async def test_meta(client, helpers):
-    image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
-
-    text_input = ["a cat", "a car", "a fridge"]
-    image_input = [image_url]
-    response_text = await client.post(
-        f"{PREFIX}/embeddings",
-        json={"model": MODEL, "input": text_input},
-    )
-    response_image = await client.post(
-        f"{PREFIX}/embeddings_image",
-        json={"model": MODEL, "input": image_input},
-    )
+    for route in [f"{PREFIX}/embeddings_image", f"{PREFIX}/embeddings"]:
+        image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+
+        text_input = ["a cat", "a car", "a fridge"]
+        image_input = [image_url]
+        response_text = await client.post(
+            f"{PREFIX}/embeddings",
+            json={"model": MODEL, "input": text_input},
+        )
+        response_image = await client.post(
+            route,
+            json={
+                "model": MODEL,
+                "input": image_input,
+                "infinity_extra_modality": "image",
+            },
+        )
 
-    assert response_text.status_code == 200
-    assert response_image.status_code == 200
+        assert response_text.status_code == 200
+        assert response_image.status_code == 200
 
-    rdata_text = response_text.json()
-    rdata_results_text = rdata_text["data"]
+        rdata_text = response_text.json()
+        rdata_results_text = rdata_text["data"]
 
-    rdata_image = response_image.json()
-    rdata_results_image = rdata_image["data"]
+        rdata_image = response_image.json()
+        rdata_results_image = rdata_image["data"]
 
-    embeddings_image_cat = rdata_results_image[0]["embedding"]
-    embeddings_text_cat = rdata_results_text[0]["embedding"]
-    embeddings_text_car = rdata_results_text[1]["embedding"]
-    embeddings_text_fridge = rdata_results_text[2]["embedding"]
+        embeddings_image_cat = rdata_results_image[0]["embedding"]
+        embeddings_text_cat = rdata_results_text[0]["embedding"]
+        embeddings_text_car = rdata_results_text[1]["embedding"]
+        embeddings_text_fridge = rdata_results_text[2]["embedding"]
 
-    assert helpers.cosine_similarity(
-        embeddings_image_cat, embeddings_text_cat
-    ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_car)
-    assert helpers.cosine_similarity(
-        embeddings_image_cat, embeddings_text_cat
-    ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_fridge)
+        assert helpers.cosine_similarity(
+            embeddings_image_cat, embeddings_text_cat
+        ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_car)
+        assert helpers.cosine_similarity(
+            embeddings_image_cat, embeddings_text_cat
+        ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_fridge)
 
 
 @pytest.mark.anyio
-@pytest.mark.parametrize("no_of_images", [1, 5, 10])
-async def test_vision_multiple(client, no_of_images):
-    image_urls = [
-        pytest.IMAGE_SAMPLE_URL,
-    ] * no_of_images
-
-    response = await client.post(
-        f"{PREFIX}/embeddings_image",
-        json={"model": MODEL, "input": image_urls},
-    )
-    assert response.status_code == 200
-    rdata = response.json()
-    rdata_results = rdata["data"]
-    assert len(rdata_results) == no_of_images
-    assert "model" in rdata
-    assert "usage" in rdata
-    assert rdata_results[0]["object"] == "embedding"
-    assert len(rdata_results[0]["embedding"]) > 0
+async def test_vision_multiple(client):
+    for route in [f"{PREFIX}/embeddings_image", f"{PREFIX}/embeddings"]:
+        for no_of_images in [1, 5, 10]:
+            image_urls = [
+                pytest.IMAGE_SAMPLE_URL,
+            ] * no_of_images
+
+            response = await client.post(
+                route,
+                json={
+                    "model": MODEL,
+                    "input": image_urls,
+                    "infinity_extra_modality": "image",
+                },
+            )
+            assert response.status_code == 200
+            rdata = response.json()
+            rdata_results = rdata["data"]
+            assert len(rdata_results) == no_of_images
+            assert "model" in rdata
+            assert "usage" in rdata
+            assert rdata_results[0]["object"] == "embedding"
+            assert len(rdata_results[0]["embedding"]) > 0
 
 
 @pytest.mark.anyio