From 2ff39c6f1bc74101f0dec03308a674a930a64873 Mon Sep 17 00:00:00 2001 From: Michael Feil <63565275+michaelfeil@users.noreply.github.com> Date: Fri, 4 Oct 2024 23:33:14 -0700 Subject: [PATCH] Embed openai broad multimodal compat (#395) * inital commit * add openapi * fix openapi * add poetry lock * re iterate on Modality dtype * commit wrt to makefile, run_test.sh scripts * re iterate on cli --- docs/assets/create_cli_v2_docs.sh | 33 ++ .../assets/create_openapi_with_server_hook.sh | 35 ++ docs/assets/openapi.json | 2 +- docs/docs/cli_v2.md | 186 ++++++++-- libs/client_infinity/Makefile | 7 +- .../infinity_client/api/default/embeddings.py | 346 ++++++++++++++++-- .../api/default/embeddings_audio.py | 16 +- .../api/default/embeddings_image.py | 16 +- .../infinity_client/models/__init__.py | 14 +- .../models/audio_embedding_input.py | 3 +- .../models/image_embedding_input.py | 3 +- .../models/open_ai_embedding_input_audio.py | 158 ++++++++ ...ing_input_audio_infinity_extra_modality.py | 8 + .../models/open_ai_embedding_input_image.py | 158 ++++++++ ...ing_input_image_infinity_extra_modality.py | 8 + ...put.py => open_ai_embedding_input_text.py} | 33 +- ...ding_input_text_infinity_extra_modality.py | 8 + .../infinity_client/poetry.lock | 190 ---------- .../infinity_client/pyproject.toml | 2 +- .../client_infinity/run_generate_with_hook.sh | 38 ++ libs/client_infinity/run_tests_with_hook.sh | 33 +- libs/infinity_emb/Makefile | 16 +- .../infinity_emb/fastapi_schemas/pymodels.py | 88 ++++- .../infinity_emb/inference/batch_handler.py | 20 +- .../infinity_emb/infinity_server.py | 136 ++++++- libs/infinity_emb/infinity_emb/primitives.py | 6 + .../infinity_emb/transformer/audio/utils.py | 8 +- libs/infinity_emb/poetry.lock | 217 ++++++++--- libs/infinity_emb/pyproject.toml | 5 +- libs/infinity_emb/tests/conftest.py | 2 +- .../end_to_end/test_api_with_dummymodel.py | 33 +- .../end_to_end/test_openapi_client_compat.py | 160 ++++++++ .../end_to_end/test_optimum_embedding.py | 4 +- .../end_to_end/test_sentence_transformers.py | 2 +- .../tests/end_to_end/test_torch_audio.py | 70 ++-- .../tests/end_to_end/test_torch_reranker.py | 2 +- .../tests/end_to_end/test_torch_vision.py | 104 +++--- 37 files changed, 1672 insertions(+), 498 deletions(-) create mode 100755 docs/assets/create_cli_v2_docs.sh create mode 100755 docs/assets/create_openapi_with_server_hook.sh create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py rename libs/client_infinity/infinity_client/infinity_client/models/{open_ai_embedding_input.py => open_ai_embedding_input_text.py} (70%) create mode 100644 libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py delete mode 100644 libs/client_infinity/infinity_client/poetry.lock create mode 100755 libs/client_infinity/run_generate_with_hook.sh create mode 100644 libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py diff --git a/docs/assets/create_cli_v2_docs.sh b/docs/assets/create_cli_v2_docs.sh new file mode 100755 index 00000000..b59825d1 --- /dev/null +++ b/docs/assets/create_cli_v2_docs.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +echo 'Generating CLI v2 documentation...' + +# Get the directory of the script +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# Define the output file path relative to the script's location +OUTPUT_FILE="$SCRIPT_DIR/../docs/cli_v2.md" + +# Ensure the output directory exists +mkdir -p "$(dirname "$OUTPUT_FILE")" + +# Write the static content to the output file +cat << EOF > "$OUTPUT_FILE" +# CLI v2 Documentation + +The current version of Infinity uses the following arguments in its CLI: +\`\`\`bash +\$ infinity_emb v2 --help +\`\`\` + +\`\`\` +EOF + +# Append the help output to the file, setting COLUMNS=80 only for this command +TERMINAL_WIDTH=120 poetry run infinity_emb v2 --help >> "$OUTPUT_FILE" 2>&1 + +# Close the code block in the markdown file +echo '```' >> "$OUTPUT_FILE" +echo 'Note: This doc is auto-generated. Do not edit this file directly.' >> "$OUTPUT_FILE" + +echo "CLI v2 documentation generated and saved to $OUTPUT_FILE." \ No newline at end of file diff --git a/docs/assets/create_openapi_with_server_hook.sh b/docs/assets/create_openapi_with_server_hook.sh new file mode 100755 index 00000000..b1ca5d95 --- /dev/null +++ b/docs/assets/create_openapi_with_server_hook.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# Function to handle cleanup +cleanup() { + echo "Cleaning up..." + if [[ -n "${INFINITY_PID:-}" ]]; then + kill "$INFINITY_PID" + fi +} + +# Set up the trap to run the cleanup function on EXIT or any error +trap cleanup EXIT + +# Start infinity_emb in the background +infinity_emb v2 --log-level error --engine debugengine & +INFINITY_PID=$! +echo "infinity_emb started with PID $INFINITY_PID" + +# Wait for infinity_emb to be ready +for i in {1..10}; do + if wget -q --spider http://0.0.0.0:7997/openapi.json; then + echo "infinity_emb is ready." + break + else + echo "Waiting for infinity_emb to be ready..." + sleep 1 + fi +done + +# Download the openapi.json +wget http://0.0.0.0:7997/openapi.json -O "$SCRIPT_DIR/openapi.json" \ No newline at end of file diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json index 6a838fa5..5dcfab07 100644 --- a/docs/assets/openapi.json +++ b/docs/assets/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.58"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]})\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":" Embeddings Image","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_audio":{"post":{"summary":" Embeddings Audio","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"OpenAIEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.59"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"infinity_extra_modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"infinity_extra_modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format= \"base64\",\n extra_body={\n \"infinity_extra_modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format= \"base64\",\n extra_body={\n \"infinity_extra_modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"infinity_extra_modality":{"type":"string","enum":["audio"],"const":"audio","title":"Infinity Extra Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"infinity_extra_modality":{"type":"string","enum":["image"],"const":"image","title":"Infinity Extra Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"infinity_extra_modality":{"type":"string","enum":["text"],"const":"text","title":"Infinity Extra Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file diff --git a/docs/docs/cli_v2.md b/docs/docs/cli_v2.md index 9d020818..1d78f73a 100644 --- a/docs/docs/cli_v2.md +++ b/docs/docs/cli_v2.md @@ -1,46 +1,152 @@ # CLI v2 Documentation The current version of Infinity uses the following arguments in its CLI: -Note: The section below is auto-generated by the makefile. - ```bash -infinity_emb v2 --help - - Usage: infinity_emb v2 [OPTIONS] - - Infinity API ♾️ cli v2. MIT License. Copyright (c) 2023-now Michael Feil - Multiple Model CLI Playbook: - - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4` - - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && INFINITY_BATCH_SIZE="8;4;" - - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both models have batch-size 8. - -╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --model-id TEXT Huggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference& [env var: `INFINITY_MODEL_ID`] [default: michaelfeil/bge-small-en-v1.5] │ -│ --served-model-name TEXT the nickname for the API, under which the model_id can be selected [env var: `INFINITY_SERVED_MODEL_NAME`] │ -│ --batch-size INTEGER maximum batch size for inference [env var: `INFINITY_BATCH_SIZE`] [default: 32] │ -│ --revision TEXT huggingface model repo revision. [env var: `INFINITY_REVISION`] │ -│ --trust-remote-code --no-trust-remote-code if potential remote modeling code from huggingface repo is trusted. [env var: `INFINITY_TRUST_REMOTE_CODE`] [default: trust-remote-code] │ -│ --engine [torch|ctranslate2|optimum|debugengine] Which backend to use. `torch` uses Pytorch GPU/CPU, optimum uses ONNX on GPU/CPU/NVIDIA-TensorRT, `CTranslate2` uses torch+ctranslate2 on CPU/GPU. [env var: `INFINITY_ENGINE`] │ -│ [default: torch] │ -│ --model-warmup --no-model-warmup if model should be warmed up after startup, and before ready. [env var: `INFINITY_MODEL_WARMUP`] [default: model-warmup] │ -│ --vector-disk-cache --no-vector-disk-cache If hash(request)/results should be cached to SQLite for latency improvement. [env var: `INFINITY_VECTOR_DISK_CACHE`] [default: vector-disk-cache] │ -│ --device [cpu|cuda|mps|tensorrt|auto] device to use for computing the model forward pass. [env var: `INFINITY_DEVICE`] [default: auto] │ -│ --lengths-via-tokenize --no-lengths-via-tokenize if True, returned tokens is based on actual tokenizer count. If false, uses len(input) as proxy. [env var: `INFINITY_LENGTHS_VIA_TOKENIZE`] [default: lengths-via-tokenize] │ -│ --dtype [float32|float16|int8|fp8|auto] dtype for the model weights. [env var: `INFINITY_DTYPE`] [default: auto] │ -│ --embedding-dtype [float32|int8|uint8|binary|ubinary] dtype post-forward pass. If != `float32`, using Post-Forward Static quantization. [env var: `INFINITY_EMBEDDING_DTYPE`] [default: float32] │ -│ --pooling-method [mean|cls|auto] overwrite the pooling method if inferred incorrectly. [env var: `INFINITY_POOLING_METHOD`] [default: auto] │ -│ --compile --no-compile Enable usage of `torch.compile(dynamic=True)` if engine relies on it. [env var: `INFINITY_COMPILE`] [default: compile] │ -│ --bettertransformer --no-bettertransformer Enables varlen flash-attention-2 via the `BetterTransformer` implementation. If available for this model. [env var: `INFINITY_BETTERTRANSFORMER`] [default: bettertransformer] │ -│ --preload-only --no-preload-only If true, only downloads models and verifies setup, then exit. Recommended for pre-caching the download in a Dockerfile. [env var: `INFINITY_PRELOAD_ONLY`] [default: no-preload-only] │ -│ --host TEXT host for the FastAPI uvicorn server [env var: `INFINITY_HOST`] [default: 0.0.0.0] │ -│ --port INTEGER port for the FastAPI uvicorn server [env var: `INFINITY_PORT`] [default: 7997] │ -│ --url-prefix TEXT prefix for all routes of the FastAPI uvicorn server. Useful if you run behind a proxy / cascaded API. [env var: `INFINITY_URL_PREFIX`] │ -│ --redirect-slash TEXT where to redirect `/` requests to. [env var: `INFINITY_REDIRECT_SLASH`] [default: /docs] │ -│ --log-level [critical|error|warning|info|debug|trace] console log level. [env var: `INFINITY_LOG_LEVEL`] [default: info] │ -│ --permissive-cors --no-permissive-cors whether to allow permissive cors. [env var: `INFINITY_PERMISSIVE_CORS`] [default: no-permissive-cors] │ -│ --api-key TEXT api_key used for authentication headers. [env var: `INFINITY_API_KEY`] │ -│ --proxy-root-path TEXT Proxy prefix for the application. See: https://fastapi.tiangolo.com/advanced/behind-a-proxy/ [env var: `INFINITY_PROXY_ROOT_PATH`] │ -│ --help Show this message and exit. │ -╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +$ infinity_emb v2 --help +``` + +``` + + Usage: infinity_emb v2 [OPTIONS] + + Infinity API ♾️ cli v2. MIT License. Copyright (c) 2023-now Michael Feil + Multiple Model CLI Playbook: + - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4` + - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && + INFINITY_BATCH_SIZE="8;4;" + - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size + 8` both models have batch-size 8. + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --model-id TEXT Huggingface model repo id. │ +│ Subset of possible models: │ +│ https://huggingface.co/models… │ +│ [env var: `INFINITY_MODEL_ID`] │ +│ [default: │ +│ michaelfeil/bge-small-en-v1.5] │ +│ --served-model-name TEXT the nickname for the API, │ +│ under which the model_id can │ +│ be selected │ +│ [env var: │ +│ `INFINITY_SERVED_MODEL_NAME`] │ +│ --batch-size INTEGER maximum batch size for │ +│ inference │ +│ [env var: │ +│ `INFINITY_BATCH_SIZE`] │ +│ [default: 32] │ +│ --revision TEXT huggingface model repo │ +│ revision. │ +│ [env var: `INFINITY_REVISION`] │ +│ --trust-remote-code --no-trust-remote-code if potential remote modeling │ +│ code from huggingface repo is │ +│ trusted. │ +│ [env var: │ +│ `INFINITY_TRUST_REMOTE_CODE`] │ +│ [default: trust-remote-code] │ +│ --engine [torch|ctranslate2|optimum|de Which backend to use. `torch` │ +│ bugengine] uses Pytorch GPU/CPU, optimum │ +│ uses ONNX on │ +│ GPU/CPU/NVIDIA-TensorRT, │ +│ `CTranslate2` uses │ +│ torch+ctranslate2 on CPU/GPU. │ +│ [env var: `INFINITY_ENGINE`] │ +│ [default: torch] │ +│ --model-warmup --no-model-warmup if model should be warmed up │ +│ after startup, and before │ +│ ready. │ +│ [env var: │ +│ `INFINITY_MODEL_WARMUP`] │ +│ [default: model-warmup] │ +│ --vector-disk-cache --no-vector-disk-cache If hash(request)/results │ +│ should be cached to SQLite for │ +│ latency improvement. │ +│ [env var: │ +│ `INFINITY_VECTOR_DISK_CACHE`] │ +│ [default: vector-disk-cache] │ +│ --device [cpu|cuda|mps|tensorrt|auto] device to use for computing │ +│ the model forward pass. │ +│ [env var: `INFINITY_DEVICE`] │ +│ [default: auto] │ +│ --lengths-via-tokenize --no-lengths-via-tokenize if True, returned tokens is │ +│ based on actual tokenizer │ +│ count. If false, uses │ +│ len(input) as proxy. │ +│ [env var: │ +│ `INFINITY_LENGTHS_VIA_TOKENIZ… │ +│ [default: │ +│ lengths-via-tokenize] │ +│ --dtype [float32|float16|int8|fp8|aut dtype for the model weights. │ +│ o] [env var: `INFINITY_DTYPE`] │ +│ [default: auto] │ +│ --embedding-dtype [float32|int8|uint8|binary|ub dtype post-forward pass. If != │ +│ inary] `float32`, using Post-Forward │ +│ Static quantization. │ +│ [env var: │ +│ `INFINITY_EMBEDDING_DTYPE`] │ +│ [default: float32] │ +│ --pooling-method [mean|cls|auto] overwrite the pooling method │ +│ if inferred incorrectly. │ +│ [env var: │ +│ `INFINITY_POOLING_METHOD`] │ +│ [default: auto] │ +│ --compile --no-compile Enable usage of │ +│ `torch.compile(dynamic=True)` │ +│ if engine relies on it. │ +│ [env var: `INFINITY_COMPILE`] │ +│ [default: compile] │ +│ --bettertransformer --no-bettertransformer Enables varlen │ +│ flash-attention-2 via the │ +│ `BetterTransformer` │ +│ implementation. If available │ +│ for this model. │ +│ [env var: │ +│ `INFINITY_BETTERTRANSFORMER`] │ +│ [default: bettertransformer] │ +│ --preload-only --no-preload-only If true, only downloads models │ +│ and verifies setup, then exit. │ +│ Recommended for pre-caching │ +│ the download in a Dockerfile. │ +│ [env var: │ +│ `INFINITY_PRELOAD_ONLY`] │ +│ [default: no-preload-only] │ +│ --host TEXT host for the FastAPI uvicorn │ +│ server │ +│ [env var: `INFINITY_HOST`] │ +│ [default: 0.0.0.0] │ +│ --port INTEGER port for the FastAPI uvicorn │ +│ server │ +│ [env var: `INFINITY_PORT`] │ +│ [default: 7997] │ +│ --url-prefix TEXT prefix for all routes of the │ +│ FastAPI uvicorn server. Useful │ +│ if you run behind a proxy / │ +│ cascaded API. │ +│ [env var: │ +│ `INFINITY_URL_PREFIX`] │ +│ --redirect-slash TEXT where to redirect `/` requests │ +│ to. │ +│ [env var: │ +│ `INFINITY_REDIRECT_SLASH`] │ +│ [default: /docs] │ +│ --log-level [critical|error|warning|info| console log level. │ +│ debug|trace] [env var: │ +│ `INFINITY_LOG_LEVEL`] │ +│ [default: info] │ +│ --permissive-cors --no-permissive-cors whether to allow permissive │ +│ cors. │ +│ [env var: │ +│ `INFINITY_PERMISSIVE_CORS`] │ +│ [default: no-permissive-cors] │ +│ --api-key TEXT api_key used for │ +│ authentication headers. │ +│ [env var: `INFINITY_API_KEY`] │ +│ --proxy-root-path TEXT Proxy prefix for the │ +│ application. See: │ +│ https://fastapi.tiangolo.com/… │ +│ [env var: │ +│ `INFINITY_PROXY_ROOT_PATH`] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` +Note: This doc is auto-generated. Do not edit this file directly. diff --git a/libs/client_infinity/Makefile b/libs/client_infinity/Makefile index 9f29710e..0e0b0b51 100644 --- a/libs/client_infinity/Makefile +++ b/libs/client_infinity/Makefile @@ -1,12 +1,7 @@ .PHONY: generate tests generate: - pip install openapi-python-client==0.21.1 - openapi-python-client generate \ - --url http://0.0.0.0:7997/openapi.json \ - --config client_config.yaml \ - --overwrite \ - --custom-template-path=./template + ./run_generate_with_hook.sh tests: ./run_tests_with_hook.sh diff --git a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py index d1e9e995..e2dfd6de 100644 --- a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py +++ b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings.py @@ -6,14 +6,16 @@ from ... import errors from ...client import AuthenticatedClient, Client from ...models.http_validation_error import HTTPValidationError -from ...models.open_ai_embedding_input import OpenAIEmbeddingInput +from ...models.open_ai_embedding_input_audio import OpenAIEmbeddingInputAudio +from ...models.open_ai_embedding_input_image import OpenAIEmbeddingInputImage +from ...models.open_ai_embedding_input_text import OpenAIEmbeddingInputText from ...models.open_ai_embedding_result import OpenAIEmbeddingResult from ...types import Response def _get_kwargs( *, - body: OpenAIEmbeddingInput, + body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"], ) -> Dict[str, Any]: headers: Dict[str, Any] = {} @@ -22,7 +24,13 @@ def _get_kwargs( "url": "/embeddings", } - _body = body.to_dict() + _body: Dict[str, Any] + if isinstance(body, OpenAIEmbeddingInputText): + _body = body.to_dict() + elif isinstance(body, OpenAIEmbeddingInputAudio): + _body = body.to_dict() + else: + _body = body.to_dict() _kwargs["json"] = _body headers["Content-Type"] = "application/json" @@ -62,20 +70,93 @@ def _build_response( def sync_detailed( *, client: Union[AuthenticatedClient, Client], - body: OpenAIEmbeddingInput, + body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"], ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]: r"""Embeddings - Encode Embeddings + Encode Embeddings. Supports with multimodal inputs. + + ## Running Text Embeddings + ```python + import requests, base64 + requests.post(\"http://..:7997/embeddings\", + json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]}) + ``` + ## Running Image Embeddings ```python - import requests requests.post(\"http://..:7997/embeddings\", - json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]}) + json={ + \"model\": \"openai/clip-vit-base-patch32\", + \"encoding_format\": \"base64\", + \"input\": [ + http://images.cocodataset.org/val2017/000000039769.jpg\", + # can also be base64 encoded + ], + # set extra modality to image to process as image + \"infinity_extra_modality\": \"image\" + ) + ``` + + ## Running Audio Embeddings + ```python + import requests, base64 + url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in + finity_emb/tests/data/audio/beep.wav\" + + def url_to_base64(url, modality = \"image\"): + '''small helper to convert url to base64 without server requiring access to the url''' + response = requests.get(url) + response.raise_for_status() + base64_encoded = base64.b64encode(response.content).decode('utf-8') + mimetype = f\"{modality}/{url.split('.')[-1]}\" + return f\"data:{mimetype};base64,{base64_encoded}\" + + requests.post(\"http://localhost:7997/embeddings\", + json={ + \"model\": \"laion/larger_clap_general\", + \"encoding_format\": \"float\", + \"input\": [ + url, url_to_base64(url, \"audio\") + ], + # set extra modality to audio to process as audio + \"infinity_extra_modality\": \"audio\" + } + ) + ``` + + ## Running via OpenAI Client + ```python + from openai import OpenAI # pip install openai==1.51.0 + client = OpenAI(base_url=\"http://localhost:7997/\") + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[url_to_base64(url, \"audio\")], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"audio\" + } + ) + + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[\"the sound of a beep\", \"the sound of a cat\"], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"text\" + } + ) + ``` + + ### Hint: Run all the above models on one server: + ```bash + infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id + laion/larger_clap_general ``` Args: - body (OpenAIEmbeddingInput): + body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage', + 'OpenAIEmbeddingInputText']): Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -99,20 +180,93 @@ def sync_detailed( def sync( *, client: Union[AuthenticatedClient, Client], - body: OpenAIEmbeddingInput, + body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"], ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]: r"""Embeddings - Encode Embeddings + Encode Embeddings. Supports with multimodal inputs. + ## Running Text Embeddings ```python - import requests + import requests, base64 requests.post(\"http://..:7997/embeddings\", - json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]}) + json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]}) + ``` + + ## Running Image Embeddings + ```python + requests.post(\"http://..:7997/embeddings\", + json={ + \"model\": \"openai/clip-vit-base-patch32\", + \"encoding_format\": \"base64\", + \"input\": [ + http://images.cocodataset.org/val2017/000000039769.jpg\", + # can also be base64 encoded + ], + # set extra modality to image to process as image + \"infinity_extra_modality\": \"image\" + ) + ``` + + ## Running Audio Embeddings + ```python + import requests, base64 + url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in + finity_emb/tests/data/audio/beep.wav\" + + def url_to_base64(url, modality = \"image\"): + '''small helper to convert url to base64 without server requiring access to the url''' + response = requests.get(url) + response.raise_for_status() + base64_encoded = base64.b64encode(response.content).decode('utf-8') + mimetype = f\"{modality}/{url.split('.')[-1]}\" + return f\"data:{mimetype};base64,{base64_encoded}\" + + requests.post(\"http://localhost:7997/embeddings\", + json={ + \"model\": \"laion/larger_clap_general\", + \"encoding_format\": \"float\", + \"input\": [ + url, url_to_base64(url, \"audio\") + ], + # set extra modality to audio to process as audio + \"infinity_extra_modality\": \"audio\" + } + ) + ``` + + ## Running via OpenAI Client + ```python + from openai import OpenAI # pip install openai==1.51.0 + client = OpenAI(base_url=\"http://localhost:7997/\") + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[url_to_base64(url, \"audio\")], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"audio\" + } + ) + + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[\"the sound of a beep\", \"the sound of a cat\"], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"text\" + } + ) + ``` + + ### Hint: Run all the above models on one server: + ```bash + infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id + laion/larger_clap_general ``` Args: - body (OpenAIEmbeddingInput): + body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage', + 'OpenAIEmbeddingInputText']): Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -131,20 +285,93 @@ def sync( async def asyncio_detailed( *, client: Union[AuthenticatedClient, Client], - body: OpenAIEmbeddingInput, + body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"], ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]: r"""Embeddings - Encode Embeddings + Encode Embeddings. Supports with multimodal inputs. + ## Running Text Embeddings ```python - import requests + import requests, base64 requests.post(\"http://..:7997/embeddings\", - json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]}) + json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]}) + ``` + + ## Running Image Embeddings + ```python + requests.post(\"http://..:7997/embeddings\", + json={ + \"model\": \"openai/clip-vit-base-patch32\", + \"encoding_format\": \"base64\", + \"input\": [ + http://images.cocodataset.org/val2017/000000039769.jpg\", + # can also be base64 encoded + ], + # set extra modality to image to process as image + \"infinity_extra_modality\": \"image\" + ) + ``` + + ## Running Audio Embeddings + ```python + import requests, base64 + url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in + finity_emb/tests/data/audio/beep.wav\" + + def url_to_base64(url, modality = \"image\"): + '''small helper to convert url to base64 without server requiring access to the url''' + response = requests.get(url) + response.raise_for_status() + base64_encoded = base64.b64encode(response.content).decode('utf-8') + mimetype = f\"{modality}/{url.split('.')[-1]}\" + return f\"data:{mimetype};base64,{base64_encoded}\" + + requests.post(\"http://localhost:7997/embeddings\", + json={ + \"model\": \"laion/larger_clap_general\", + \"encoding_format\": \"float\", + \"input\": [ + url, url_to_base64(url, \"audio\") + ], + # set extra modality to audio to process as audio + \"infinity_extra_modality\": \"audio\" + } + ) + ``` + + ## Running via OpenAI Client + ```python + from openai import OpenAI # pip install openai==1.51.0 + client = OpenAI(base_url=\"http://localhost:7997/\") + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[url_to_base64(url, \"audio\")], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"audio\" + } + ) + + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[\"the sound of a beep\", \"the sound of a cat\"], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"text\" + } + ) + ``` + + ### Hint: Run all the above models on one server: + ```bash + infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id + laion/larger_clap_general ``` Args: - body (OpenAIEmbeddingInput): + body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage', + 'OpenAIEmbeddingInputText']): Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -166,20 +393,93 @@ async def asyncio_detailed( async def asyncio( *, client: Union[AuthenticatedClient, Client], - body: OpenAIEmbeddingInput, + body: Union["OpenAIEmbeddingInputAudio", "OpenAIEmbeddingInputImage", "OpenAIEmbeddingInputText"], ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]: r"""Embeddings - Encode Embeddings + Encode Embeddings. Supports with multimodal inputs. + ## Running Text Embeddings ```python - import requests + import requests, base64 requests.post(\"http://..:7997/embeddings\", - json={\"model\":\"BAAI/bge-small-en-v1.5\",\"input\":[\"A sentence to encode.\"]}) + json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]}) + ``` + + ## Running Image Embeddings + ```python + requests.post(\"http://..:7997/embeddings\", + json={ + \"model\": \"openai/clip-vit-base-patch32\", + \"encoding_format\": \"base64\", + \"input\": [ + http://images.cocodataset.org/val2017/000000039769.jpg\", + # can also be base64 encoded + ], + # set extra modality to image to process as image + \"infinity_extra_modality\": \"image\" + ) + ``` + + ## Running Audio Embeddings + ```python + import requests, base64 + url = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/in + finity_emb/tests/data/audio/beep.wav\" + + def url_to_base64(url, modality = \"image\"): + '''small helper to convert url to base64 without server requiring access to the url''' + response = requests.get(url) + response.raise_for_status() + base64_encoded = base64.b64encode(response.content).decode('utf-8') + mimetype = f\"{modality}/{url.split('.')[-1]}\" + return f\"data:{mimetype};base64,{base64_encoded}\" + + requests.post(\"http://localhost:7997/embeddings\", + json={ + \"model\": \"laion/larger_clap_general\", + \"encoding_format\": \"float\", + \"input\": [ + url, url_to_base64(url, \"audio\") + ], + # set extra modality to audio to process as audio + \"infinity_extra_modality\": \"audio\" + } + ) + ``` + + ## Running via OpenAI Client + ```python + from openai import OpenAI # pip install openai==1.51.0 + client = OpenAI(base_url=\"http://localhost:7997/\") + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[url_to_base64(url, \"audio\")], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"audio\" + } + ) + + client.embeddings.create( + model=\"laion/larger_clap_general\", + input=[\"the sound of a beep\", \"the sound of a cat\"], + encoding_format= \"base64\", + extra_body={ + \"infinity_extra_modality\": \"text\" + } + ) + ``` + + ### Hint: Run all the above models on one server: + ```bash + infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id + laion/larger_clap_general ``` Args: - body (OpenAIEmbeddingInput): + body (Union['OpenAIEmbeddingInputAudio', 'OpenAIEmbeddingInputImage', + 'OpenAIEmbeddingInputText']): Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. diff --git a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py index 95e8ed0e..ff2ece66 100644 --- a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py +++ b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_audio.py @@ -64,7 +64,7 @@ def sync_detailed( client: Union[AuthenticatedClient, Client], body: AudioEmbeddingInput, ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Audio + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio` Encode Embeddings from Audio files @@ -84,7 +84,7 @@ def sync_detailed( ``` Args: - body (AudioEmbeddingInput): + body (AudioEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -110,7 +110,7 @@ def sync( client: Union[AuthenticatedClient, Client], body: AudioEmbeddingInput, ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Audio + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio` Encode Embeddings from Audio files @@ -130,7 +130,7 @@ def sync( ``` Args: - body (AudioEmbeddingInput): + body (AudioEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -151,7 +151,7 @@ async def asyncio_detailed( client: Union[AuthenticatedClient, Client], body: AudioEmbeddingInput, ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Audio + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio` Encode Embeddings from Audio files @@ -171,7 +171,7 @@ async def asyncio_detailed( ``` Args: - body (AudioEmbeddingInput): + body (AudioEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -195,7 +195,7 @@ async def asyncio( client: Union[AuthenticatedClient, Client], body: AudioEmbeddingInput, ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Audio + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio` Encode Embeddings from Audio files @@ -215,7 +215,7 @@ async def asyncio( ``` Args: - body (AudioEmbeddingInput): + body (AudioEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. diff --git a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py index 71f36ad7..9bceddb9 100644 --- a/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py +++ b/libs/client_infinity/infinity_client/infinity_client/api/default/embeddings_image.py @@ -64,7 +64,7 @@ def sync_detailed( client: Union[AuthenticatedClient, Client], body: ImageEmbeddingInput, ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Image + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image` Encode Embeddings from Image files @@ -83,7 +83,7 @@ def sync_detailed( ``` Args: - body (ImageEmbeddingInput): + body (ImageEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -109,7 +109,7 @@ def sync( client: Union[AuthenticatedClient, Client], body: ImageEmbeddingInput, ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Image + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image` Encode Embeddings from Image files @@ -128,7 +128,7 @@ def sync( ``` Args: - body (ImageEmbeddingInput): + body (ImageEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -149,7 +149,7 @@ async def asyncio_detailed( client: Union[AuthenticatedClient, Client], body: ImageEmbeddingInput, ) -> Response[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Image + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image` Encode Embeddings from Image files @@ -168,7 +168,7 @@ async def asyncio_detailed( ``` Args: - body (ImageEmbeddingInput): + body (ImageEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. @@ -192,7 +192,7 @@ async def asyncio( client: Union[AuthenticatedClient, Client], body: ImageEmbeddingInput, ) -> Optional[Union[HTTPValidationError, OpenAIEmbeddingResult]]: - r"""Embeddings Image + r"""Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image` Encode Embeddings from Image files @@ -211,7 +211,7 @@ async def asyncio( ``` Args: - body (ImageEmbeddingInput): + body (ImageEmbeddingInput): # LEGACY Raises: errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. diff --git a/libs/client_infinity/infinity_client/infinity_client/models/__init__.py b/libs/client_infinity/infinity_client/infinity_client/models/__init__.py index 16ea6139..8a58b644 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/__init__.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/__init__.py @@ -13,7 +13,12 @@ from .model_info import ModelInfo from .model_info_object import ModelInfoObject from .model_info_owned_by import ModelInfoOwnedBy -from .open_ai_embedding_input import OpenAIEmbeddingInput +from .open_ai_embedding_input_audio import OpenAIEmbeddingInputAudio +from .open_ai_embedding_input_audio_infinity_extra_modality import OpenAIEmbeddingInputAudioInfinityExtraModality +from .open_ai_embedding_input_image import OpenAIEmbeddingInputImage +from .open_ai_embedding_input_image_infinity_extra_modality import OpenAIEmbeddingInputImageInfinityExtraModality +from .open_ai_embedding_input_text import OpenAIEmbeddingInputText +from .open_ai_embedding_input_text_infinity_extra_modality import OpenAIEmbeddingInputTextInfinityExtraModality from .open_ai_embedding_result import OpenAIEmbeddingResult from .open_ai_embedding_result_object import OpenAIEmbeddingResultObject from .open_ai_model_info import OpenAIModelInfo @@ -40,7 +45,12 @@ "ModelInfo", "ModelInfoObject", "ModelInfoOwnedBy", - "OpenAIEmbeddingInput", + "OpenAIEmbeddingInputAudio", + "OpenAIEmbeddingInputAudioInfinityExtraModality", + "OpenAIEmbeddingInputImage", + "OpenAIEmbeddingInputImageInfinityExtraModality", + "OpenAIEmbeddingInputText", + "OpenAIEmbeddingInputTextInfinityExtraModality", "OpenAIEmbeddingResult", "OpenAIEmbeddingResultObject", "OpenAIModelInfo", diff --git a/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py b/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py index d4f6d247..11986996 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/audio_embedding_input.py @@ -11,7 +11,8 @@ @_attrs_define class AudioEmbeddingInput: - """ + """# LEGACY + Attributes: input_ (Union[List[str], str]): model (Union[Unset, str]): Default: 'default/not-specified'. diff --git a/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py b/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py index a75a5801..c91d1ff4 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/image_embedding_input.py @@ -11,7 +11,8 @@ @_attrs_define class ImageEmbeddingInput: - """ + """# LEGACY + Attributes: input_ (Union[List[str], str]): model (Union[Unset, str]): Default: 'default/not-specified'. diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py new file mode 100644 index 00000000..0653500d --- /dev/null +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py @@ -0,0 +1,158 @@ +from typing import Any, Dict, List, Type, TypeVar, Union, cast + +from attrs import define as _attrs_define +from attrs import field as _attrs_field + +from ..models.embedding_encoding_format import EmbeddingEncodingFormat +from ..models.open_ai_embedding_input_audio_infinity_extra_modality import ( + OpenAIEmbeddingInputAudioInfinityExtraModality, +) +from ..types import UNSET, Unset + +T = TypeVar("T", bound="OpenAIEmbeddingInputAudio") + + +@_attrs_define +class OpenAIEmbeddingInputAudio: + """ + Attributes: + input_ (Union[List[str], str]): + model (Union[Unset, str]): Default: 'default/not-specified'. + encoding_format (Union[Unset, EmbeddingEncodingFormat]): + user (Union[None, Unset, str]): + infinity_extra_modality (Union[Unset, OpenAIEmbeddingInputAudioInfinityExtraModality]): Default: + OpenAIEmbeddingInputAudioInfinityExtraModality.AUDIO. + """ + + input_: Union[List[str], str] + model: Union[Unset, str] = "default/not-specified" + encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET + user: Union[None, Unset, str] = UNSET + infinity_extra_modality: Union[ + Unset, OpenAIEmbeddingInputAudioInfinityExtraModality + ] = OpenAIEmbeddingInputAudioInfinityExtraModality.AUDIO + additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) + + def to_dict(self) -> Dict[str, Any]: + input_: Union[List[str], str] + if isinstance(self.input_, list): + input_ = [] + for input_type_0_item_data in self.input_: + input_type_0_item: str + input_type_0_item = input_type_0_item_data + input_.append(input_type_0_item) + + else: + input_ = self.input_ + + model = self.model + + encoding_format: Union[Unset, str] = UNSET + if not isinstance(self.encoding_format, Unset): + encoding_format = self.encoding_format.value + + user: Union[None, Unset, str] + if isinstance(self.user, Unset): + user = UNSET + else: + user = self.user + + infinity_extra_modality: Union[Unset, str] = UNSET + if not isinstance(self.infinity_extra_modality, Unset): + infinity_extra_modality = self.infinity_extra_modality.value + + field_dict: Dict[str, Any] = {} + field_dict.update(self.additional_properties) + field_dict.update( + { + "input": input_, + } + ) + if model is not UNSET: + field_dict["model"] = model + if encoding_format is not UNSET: + field_dict["encoding_format"] = encoding_format + if user is not UNSET: + field_dict["user"] = user + if infinity_extra_modality is not UNSET: + field_dict["infinity_extra_modality"] = infinity_extra_modality + + return field_dict + + @classmethod + def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T: + d = src_dict.copy() + + def _parse_input_(data: object) -> Union[List[str], str]: + try: + if not isinstance(data, list): + raise TypeError() + input_type_0 = [] + _input_type_0 = data + for input_type_0_item_data in _input_type_0: + + def _parse_input_type_0_item(data: object) -> str: + return cast(str, data) + + input_type_0_item = _parse_input_type_0_item(input_type_0_item_data) + + input_type_0.append(input_type_0_item) + + return input_type_0 + except: # noqa: E722 + pass + return cast(Union[List[str], str], data) + + input_ = _parse_input_(d.pop("input")) + + model = d.pop("model", UNSET) + + _encoding_format = d.pop("encoding_format", UNSET) + encoding_format: Union[Unset, EmbeddingEncodingFormat] + if isinstance(_encoding_format, Unset): + encoding_format = UNSET + else: + encoding_format = EmbeddingEncodingFormat(_encoding_format) + + def _parse_user(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + user = _parse_user(d.pop("user", UNSET)) + + _infinity_extra_modality = d.pop("infinity_extra_modality", UNSET) + infinity_extra_modality: Union[Unset, OpenAIEmbeddingInputAudioInfinityExtraModality] + if isinstance(_infinity_extra_modality, Unset): + infinity_extra_modality = UNSET + else: + infinity_extra_modality = OpenAIEmbeddingInputAudioInfinityExtraModality(_infinity_extra_modality) + + open_ai_embedding_input_audio = cls( + input_=input_, + model=model, + encoding_format=encoding_format, + user=user, + infinity_extra_modality=infinity_extra_modality, + ) + + open_ai_embedding_input_audio.additional_properties = d + return open_ai_embedding_input_audio + + @property + def additional_keys(self) -> List[str]: + return list(self.additional_properties.keys()) + + def __getitem__(self, key: str) -> Any: + return self.additional_properties[key] + + def __setitem__(self, key: str, value: Any) -> None: + self.additional_properties[key] = value + + def __delitem__(self, key: str) -> None: + del self.additional_properties[key] + + def __contains__(self, key: str) -> bool: + return key in self.additional_properties diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py new file mode 100644 index 00000000..257de996 --- /dev/null +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio_infinity_extra_modality.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class OpenAIEmbeddingInputAudioInfinityExtraModality(str, Enum): + AUDIO = "audio" + + def __str__(self) -> str: + return str(self.value) diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py new file mode 100644 index 00000000..d2b70311 --- /dev/null +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py @@ -0,0 +1,158 @@ +from typing import Any, Dict, List, Type, TypeVar, Union, cast + +from attrs import define as _attrs_define +from attrs import field as _attrs_field + +from ..models.embedding_encoding_format import EmbeddingEncodingFormat +from ..models.open_ai_embedding_input_image_infinity_extra_modality import ( + OpenAIEmbeddingInputImageInfinityExtraModality, +) +from ..types import UNSET, Unset + +T = TypeVar("T", bound="OpenAIEmbeddingInputImage") + + +@_attrs_define +class OpenAIEmbeddingInputImage: + """ + Attributes: + input_ (Union[List[str], str]): + model (Union[Unset, str]): Default: 'default/not-specified'. + encoding_format (Union[Unset, EmbeddingEncodingFormat]): + user (Union[None, Unset, str]): + infinity_extra_modality (Union[Unset, OpenAIEmbeddingInputImageInfinityExtraModality]): Default: + OpenAIEmbeddingInputImageInfinityExtraModality.IMAGE. + """ + + input_: Union[List[str], str] + model: Union[Unset, str] = "default/not-specified" + encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET + user: Union[None, Unset, str] = UNSET + infinity_extra_modality: Union[ + Unset, OpenAIEmbeddingInputImageInfinityExtraModality + ] = OpenAIEmbeddingInputImageInfinityExtraModality.IMAGE + additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) + + def to_dict(self) -> Dict[str, Any]: + input_: Union[List[str], str] + if isinstance(self.input_, list): + input_ = [] + for input_type_0_item_data in self.input_: + input_type_0_item: str + input_type_0_item = input_type_0_item_data + input_.append(input_type_0_item) + + else: + input_ = self.input_ + + model = self.model + + encoding_format: Union[Unset, str] = UNSET + if not isinstance(self.encoding_format, Unset): + encoding_format = self.encoding_format.value + + user: Union[None, Unset, str] + if isinstance(self.user, Unset): + user = UNSET + else: + user = self.user + + infinity_extra_modality: Union[Unset, str] = UNSET + if not isinstance(self.infinity_extra_modality, Unset): + infinity_extra_modality = self.infinity_extra_modality.value + + field_dict: Dict[str, Any] = {} + field_dict.update(self.additional_properties) + field_dict.update( + { + "input": input_, + } + ) + if model is not UNSET: + field_dict["model"] = model + if encoding_format is not UNSET: + field_dict["encoding_format"] = encoding_format + if user is not UNSET: + field_dict["user"] = user + if infinity_extra_modality is not UNSET: + field_dict["infinity_extra_modality"] = infinity_extra_modality + + return field_dict + + @classmethod + def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T: + d = src_dict.copy() + + def _parse_input_(data: object) -> Union[List[str], str]: + try: + if not isinstance(data, list): + raise TypeError() + input_type_0 = [] + _input_type_0 = data + for input_type_0_item_data in _input_type_0: + + def _parse_input_type_0_item(data: object) -> str: + return cast(str, data) + + input_type_0_item = _parse_input_type_0_item(input_type_0_item_data) + + input_type_0.append(input_type_0_item) + + return input_type_0 + except: # noqa: E722 + pass + return cast(Union[List[str], str], data) + + input_ = _parse_input_(d.pop("input")) + + model = d.pop("model", UNSET) + + _encoding_format = d.pop("encoding_format", UNSET) + encoding_format: Union[Unset, EmbeddingEncodingFormat] + if isinstance(_encoding_format, Unset): + encoding_format = UNSET + else: + encoding_format = EmbeddingEncodingFormat(_encoding_format) + + def _parse_user(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + user = _parse_user(d.pop("user", UNSET)) + + _infinity_extra_modality = d.pop("infinity_extra_modality", UNSET) + infinity_extra_modality: Union[Unset, OpenAIEmbeddingInputImageInfinityExtraModality] + if isinstance(_infinity_extra_modality, Unset): + infinity_extra_modality = UNSET + else: + infinity_extra_modality = OpenAIEmbeddingInputImageInfinityExtraModality(_infinity_extra_modality) + + open_ai_embedding_input_image = cls( + input_=input_, + model=model, + encoding_format=encoding_format, + user=user, + infinity_extra_modality=infinity_extra_modality, + ) + + open_ai_embedding_input_image.additional_properties = d + return open_ai_embedding_input_image + + @property + def additional_keys(self) -> List[str]: + return list(self.additional_properties.keys()) + + def __getitem__(self, key: str) -> Any: + return self.additional_properties[key] + + def __setitem__(self, key: str, value: Any) -> None: + self.additional_properties[key] = value + + def __delitem__(self, key: str) -> None: + del self.additional_properties[key] + + def __contains__(self, key: str) -> bool: + return key in self.additional_properties diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py new file mode 100644 index 00000000..20a57588 --- /dev/null +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image_infinity_extra_modality.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class OpenAIEmbeddingInputImageInfinityExtraModality(str, Enum): + IMAGE = "image" + + def __str__(self) -> str: + return str(self.value) diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py similarity index 70% rename from libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input.py rename to libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py index 756d91b8..2396a2f8 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py @@ -4,25 +4,32 @@ from attrs import field as _attrs_field from ..models.embedding_encoding_format import EmbeddingEncodingFormat +from ..models.open_ai_embedding_input_text_infinity_extra_modality import OpenAIEmbeddingInputTextInfinityExtraModality from ..types import UNSET, Unset -T = TypeVar("T", bound="OpenAIEmbeddingInput") +T = TypeVar("T", bound="OpenAIEmbeddingInputText") @_attrs_define -class OpenAIEmbeddingInput: - """ +class OpenAIEmbeddingInputText: + """helper + Attributes: input_ (Union[List[str], str]): model (Union[Unset, str]): Default: 'default/not-specified'. encoding_format (Union[Unset, EmbeddingEncodingFormat]): user (Union[None, Unset, str]): + infinity_extra_modality (Union[Unset, OpenAIEmbeddingInputTextInfinityExtraModality]): Default: + OpenAIEmbeddingInputTextInfinityExtraModality.TEXT. """ input_: Union[List[str], str] model: Union[Unset, str] = "default/not-specified" encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET user: Union[None, Unset, str] = UNSET + infinity_extra_modality: Union[ + Unset, OpenAIEmbeddingInputTextInfinityExtraModality + ] = OpenAIEmbeddingInputTextInfinityExtraModality.TEXT additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> Dict[str, Any]: @@ -45,6 +52,10 @@ def to_dict(self) -> Dict[str, Any]: else: user = self.user + infinity_extra_modality: Union[Unset, str] = UNSET + if not isinstance(self.infinity_extra_modality, Unset): + infinity_extra_modality = self.infinity_extra_modality.value + field_dict: Dict[str, Any] = {} field_dict.update(self.additional_properties) field_dict.update( @@ -58,6 +69,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["encoding_format"] = encoding_format if user is not UNSET: field_dict["user"] = user + if infinity_extra_modality is not UNSET: + field_dict["infinity_extra_modality"] = infinity_extra_modality return field_dict @@ -96,15 +109,23 @@ def _parse_user(data: object) -> Union[None, Unset, str]: user = _parse_user(d.pop("user", UNSET)) - open_ai_embedding_input = cls( + _infinity_extra_modality = d.pop("infinity_extra_modality", UNSET) + infinity_extra_modality: Union[Unset, OpenAIEmbeddingInputTextInfinityExtraModality] + if isinstance(_infinity_extra_modality, Unset): + infinity_extra_modality = UNSET + else: + infinity_extra_modality = OpenAIEmbeddingInputTextInfinityExtraModality(_infinity_extra_modality) + + open_ai_embedding_input_text = cls( input_=input_, model=model, encoding_format=encoding_format, user=user, + infinity_extra_modality=infinity_extra_modality, ) - open_ai_embedding_input.additional_properties = d - return open_ai_embedding_input + open_ai_embedding_input_text.additional_properties = d + return open_ai_embedding_input_text @property def additional_keys(self) -> List[str]: diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py new file mode 100644 index 00000000..2d38fd9e --- /dev/null +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text_infinity_extra_modality.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class OpenAIEmbeddingInputTextInfinityExtraModality(str, Enum): + TEXT = "text" + + def __str__(self) -> str: + return str(self.value) diff --git a/libs/client_infinity/infinity_client/poetry.lock b/libs/client_infinity/infinity_client/poetry.lock deleted file mode 100644 index b546f420..00000000 --- a/libs/client_infinity/infinity_client/poetry.lock +++ /dev/null @@ -1,190 +0,0 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. - -[[package]] -name = "anyio" -version = "4.5.0" -description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = false -python-versions = ">=3.8" -files = [ - {file = "anyio-4.5.0-py3-none-any.whl", hash = "sha256:fdeb095b7cc5a5563175eedd926ec4ae55413bb4be5770c424af0ba46ccb4a78"}, - {file = "anyio-4.5.0.tar.gz", hash = "sha256:c5a275fe5ca0afd788001f58fca1e69e29ce706d746e317d660e21f70c530ef9"}, -] - -[package.dependencies] -exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} -idna = ">=2.8" -sniffio = ">=1.1" -typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} - -[package.extras] -doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"] -trio = ["trio (>=0.26.1)"] - -[[package]] -name = "attrs" -version = "24.2.0" -description = "Classes Without Boilerplate" -optional = false -python-versions = ">=3.7" -files = [ - {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, - {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, -] - -[package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] - -[[package]] -name = "certifi" -version = "2024.8.30" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.6" -files = [ - {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, - {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, -] - -[[package]] -name = "exceptiongroup" -version = "1.2.2" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, - {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, -] - -[package.extras] -test = ["pytest (>=6)"] - -[[package]] -name = "h11" -version = "0.14.0" -description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = false -python-versions = ">=3.7" -files = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, -] - -[[package]] -name = "httpcore" -version = "1.0.5" -description = "A minimal low-level HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, - {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, -] - -[package.dependencies] -certifi = "*" -h11 = ">=0.13,<0.15" - -[package.extras] -asyncio = ["anyio (>=4.0,<5.0)"] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] -trio = ["trio (>=0.22.0,<0.26.0)"] - -[[package]] -name = "httpx" -version = "0.27.2" -description = "The next generation HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, - {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, -] - -[package.dependencies] -anyio = "*" -certifi = "*" -httpcore = "==1.*" -idna = "*" -sniffio = "*" - -[package.extras] -brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "idna" -version = "3.10" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.6" -files = [ - {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, - {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, -] - -[package.extras] -all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -description = "Extensions to the standard Python datetime module" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, - {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, -] - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - -[[package]] -name = "sniffio" -version = "1.3.1" -description = "Sniff out which async library your code is running under" -optional = false -python-versions = ">=3.7" -files = [ - {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, - {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, -] - -[[package]] -name = "typing-extensions" -version = "4.12.2" -description = "Backported and Experimental Type Hints for Python 3.8+" -optional = false -python-versions = ">=3.8" -files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, -] - -[metadata] -lock-version = "2.0" -python-versions = "^3.8" -content-hash = "d60bbf780385a47ffdf4a33a182fae5677d1ce66444290d6c2a48ba544f347da" diff --git a/libs/client_infinity/infinity_client/pyproject.toml b/libs/client_infinity/infinity_client/pyproject.toml index 2952170b..46e56b77 100644 --- a/libs/client_infinity/infinity_client/pyproject.toml +++ b/libs/client_infinity/infinity_client/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "infinity_client" -version = "0.0.59" +version = "0.0.58" description = "A client library for accessing ♾️ Infinity - Embedding Inference Server" authors = [] readme = "README.md" diff --git a/libs/client_infinity/run_generate_with_hook.sh b/libs/client_infinity/run_generate_with_hook.sh new file mode 100755 index 00000000..6c316293 --- /dev/null +++ b/libs/client_infinity/run_generate_with_hook.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -euo pipefail + +# Function to handle cleanup +cleanup() { + echo "Cleaning up..." + if [[ -n "${INFINITY_PID:-}" ]]; then + kill "$INFINITY_PID" + fi +} + +# Set up the trap to run the cleanup function on EXIT or any error +trap cleanup EXIT + +# Start infinity_emb in the background +infinity_emb v2 --log-level error --engine debugengine & +INFINITY_PID=$! +echo "infinity_emb started with PID $INFINITY_PID" + +# Wait for infinity_emb to be ready +for i in {1..10}; do + if wget -q --spider http://0.0.0.0:7997/openapi.json; then + echo "infinity_emb is ready." + break + else + echo "Waiting for infinity_emb to be ready..." + sleep 1 + fi +done + +# Run the tests +cd infinity_client && \ +poetry install && \ +poetry run pip install pytest requests && \ +poetry run python -m pytest ../tests + +# Cleanup will be called due to the trap \ No newline at end of file diff --git a/libs/client_infinity/run_tests_with_hook.sh b/libs/client_infinity/run_tests_with_hook.sh index b42c4a2b..5ea1d773 100755 --- a/libs/client_infinity/run_tests_with_hook.sh +++ b/libs/client_infinity/run_tests_with_hook.sh @@ -1,21 +1,40 @@ #!/bin/bash +set -euo pipefail + # Function to handle cleanup cleanup() { echo "Cleaning up..." - pkill -f infinity_emb + if [[ -n "${INFINITY_PID:-}" ]]; then + kill "$INFINITY_PID" + fi } # Set up the trap to run the cleanup function on EXIT or any error trap cleanup EXIT # Start infinity_emb in the background -infinity_emb v2 --log-level error & -echo "infinity_emb started with PID $!" +infinity_emb v2 --log-level error --engine debugengine & +INFINITY_PID=$! +echo "infinity_emb started with PID $INFINITY_PID" + +# Wait for infinity_emb to be ready +for i in {1..10}; do + if wget -q --spider http://0.0.0.0:7997/openapi.json; then + echo "infinity_emb is ready." + break + else + echo "Waiting for infinity_emb to be ready..." + sleep 1 + fi +done # Run the tests -cd infinity_client && \ -poetry install && \ -poetry run pip install pytest requests && \ -poetry run python -m pytest ../tests +pip install openapi-python-client==0.21.1 + openapi-python-client generate \ + --url http://0.0.0.0:7997/openapi.json \ + --config client_config.yaml \ + --overwrite \ + --custom-template-path=./template + # Cleanup will be called due to the trap \ No newline at end of file diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile index edd74370..b9e6faa4 100644 --- a/libs/infinity_emb/Makefile +++ b/libs/infinity_emb/Makefile @@ -3,7 +3,7 @@ # Default target executed when no arguments are given to make. all: help -precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs test +precommit : | format spell_fix spell_check lint poetry_check cli_v2_docs openapi test ###################### # TESTING AND COVERAGE @@ -22,7 +22,7 @@ test tests: poetry run pytest openapi: - wget http://0.0.0.0:7997/openapi.json -O ../../docs/assets/openapi.json + ./../../docs/assets/create_openapi_with_server_hook.sh ###################### # LINTING AND FORMATTING @@ -60,17 +60,7 @@ benchmark_embed: tests/data/benchmark/benchmark_embed.json # Generate CLI v2 documentation cli_v2_docs: - @echo 'Generating CLI v2 documentation...' - @echo '# CLI v2 Documentation' > ../../docs/docs/cli_v2.md - @echo >> ../../docs/docs/cli_v2.md - @echo 'The current version of Infinity uses the following arguments in its CLI:' >> ../../docs/docs/cli_v2.md - @echo 'Note: The section below is auto-generated by the makefile.' >> ../../docs/docs/cli_v2.md - @echo >> ../../docs/docs/cli_v2.md - @echo '```bash' >> ../../docs/docs/cli_v2.md - @echo '$ infinity_emb v2 --help' >> ../../docs/docs/cli_v2.md - poetry run infinity_emb v2 --help >> ../../docs/docs/cli_v2.md - @echo '```' >> ../../docs/docs/cli_v2.md - @echo 'CLI v2 documentation generated and saved to ../../docs/docs/cli_v2.md.' + ./../../docs/assets/create_cli_v2_docs.sh ###################### # HELP diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index 2541f44b..53ce192a 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -15,7 +15,7 @@ from infinity_emb.primitives import ClassifyReturnType, EmbeddingReturnType from infinity_emb._optional_imports import CHECK_PYDANTIC -from infinity_emb.primitives import EmbeddingEncodingFormat +from infinity_emb.primitives import EmbeddingEncodingFormat, Modality # potential backwards compatibility to pydantic 1.X # pydantic 2.x is preferred by not strictly needed @@ -23,6 +23,14 @@ from pydantic import BaseModel, Field, conlist try: + from pydantic import ( + BaseModel, + Discriminator, + Field, + RootModel, + Tag, + ) + from .data_uri import DataURI from .pydantic_v2 import ( INPUT_STRING, @@ -51,6 +59,15 @@ class BaseModel: # type: ignore[no-redef] pass + class RootModel: # type: ignore + pass + + class Tag: # type: ignore + pass + + class HttpUrl: # type: ignore + pass + class DataURI: # type: ignore pass @@ -66,7 +83,15 @@ class _Usage(BaseModel): total_tokens: int -class OpenAIEmbeddingInput(BaseModel): +class _OpenAIEmbeddingInput(BaseModel): + model: str = "default/not-specified" + encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float + user: Optional[str] = None + + +class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput): + """helper""" + input: Union[ # type: ignore conlist( # type: ignore Annotated[str, INPUT_STRING], @@ -74,12 +99,56 @@ class OpenAIEmbeddingInput(BaseModel): ), Annotated[str, INPUT_STRING], ] - model: str = "default/not-specified" - encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float - user: Optional[str] = None + infinity_extra_modality: Literal[Modality.text] = Modality.text # type: ignore + + +class _OpenAIEmbeddingInput_URI(_OpenAIEmbeddingInput): + """helper""" + + input: Union[ # type: ignore + conlist( # type: ignore + DataURIorURL, + **ITEMS_LIMIT_SMALL, + ), + DataURIorURL, + ] + + +class OpenAIEmbeddingInput_Audio(_OpenAIEmbeddingInput_URI): + infinity_extra_modality: Literal[Modality.audio] = Modality.audio # type: ignore + + +class OpenAIEmbeddingInput_Image(_OpenAIEmbeddingInput_URI): + infinity_extra_modality: Literal[Modality.image] = Modality.image # type: ignore + + +def get_infinity_extra_modality(obj: dict) -> str: + """resolve the modality of the extra_body. + If not present, default to text + + Function name is used to return error message, keep it explicit + """ + try: + return obj.get("infinity_extra_modality", Modality.text.value) + except AttributeError: + # in case a very weird request is sent, validate it against the default + return Modality.text.value + + +class MultiModalOpenAIEmbedding(RootModel): + root: Annotated[ + Union[ + Annotated[_OpenAIEmbeddingInput_Text, Tag(Modality.text.value)], + Annotated[OpenAIEmbeddingInput_Audio, Tag(Modality.audio.value)], + Annotated[OpenAIEmbeddingInput_Image, Tag(Modality.image.value)], + ], + Discriminator(get_infinity_extra_modality), + ] class ImageEmbeddingInput(BaseModel): + """LEGACY, DO NO LONGER UPDATE""" + input: Union[ # type: ignore conlist( # type: ignore DataURIorURL, @@ -93,6 +162,8 @@ class ImageEmbeddingInput(BaseModel): class AudioEmbeddingInput(ImageEmbeddingInput): + """LEGACY, DO NO LONGER UPDATE""" + pass @@ -118,9 +189,10 @@ def to_embeddings_response( encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float, ) -> dict[str, Union[str, list[dict], dict]]: if encoding_format == EmbeddingEncodingFormat.base64: - assert ( - not engine_args.embedding_dtype.uses_bitpacking() - ), f"model {engine_args.served_model_name} does not support base64 encoding, as it uses uint8-bitpacking with {engine_args.embedding_dtype}" + if engine_args.embedding_dtype.uses_bitpacking(): + raise ValueError( + f"model {engine_args.served_model_name} does not support base64 encoding, as it uses uint8-bitpacking with {engine_args.embedding_dtype}" + ) embeddings = [base64.b64encode(np.frombuffer(emb.astype(np.float32), dtype=np.float32)) for emb in embeddings] # type: ignore return dict( diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 427fceba..507bac88 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -138,8 +138,8 @@ async def embed( """ if "embed" not in self.model_worker.capabilities: raise ModelNotDeployedError( - "the loaded moded cannot fullyfill `embed`." - f"options are {self.model_worker.capabilities}." + "the loaded moded cannot fullyfill `embed`. " + f"Options are {self.model_worker.capabilities}." ) input_sentences = [EmbeddingSingle(sentence=s) for s in sentences] @@ -166,8 +166,8 @@ async def rerank( """ if "rerank" not in self.model_worker.capabilities: raise ModelNotDeployedError( - "the loaded moded cannot fullyfill `rerank`." - f"options are {self.model_worker.capabilities}." + "the loaded moded cannot fullyfill `rerank`. " + f"Options are {self.model_worker.capabilities}." ) rerankables = [ReRankSingle(query=query, document=doc) for doc in docs] scores, usage = await self._schedule(rerankables) @@ -197,8 +197,8 @@ async def classify( """ if "classify" not in self.model_worker.capabilities: raise ModelNotDeployedError( - "the loaded moded cannot fullyfill `classify`." - f"options are {self.model_worker.capabilities}." + "the loaded moded cannot fullyfill `classify`. " + f"Options are {self.model_worker.capabilities}." ) items = [PredictSingle(sentence=s) for s in sentences] classifications, usage = await self._schedule(items) @@ -230,8 +230,8 @@ async def image_embed( if "image_embed" not in self.model_worker.capabilities: raise ModelNotDeployedError( - "the loaded moded cannot fullyfill `image_embed`." - f"options are {self.model_worker.capabilities}." + "the loaded moded cannot fullyfill `image_embed`. " + f"Options are {self.model_worker.capabilities}." ) items = await resolve_images(images) @@ -259,8 +259,8 @@ async def audio_embed( if "audio_embed" not in self.model_worker.capabilities: raise ModelNotDeployedError( - "the loaded moded cannot fullyfill `audio_embed`." - f"options are {self.model_worker.capabilities}." + "the loaded moded cannot fullyfill `audio_embed`. " + f"Options are {self.model_worker.capabilities}." ) items = await resolve_audios( diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 512d3096..405ff4ab 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -21,7 +21,7 @@ ClassifyResult, DataURIorURL, ImageEmbeddingInput, - OpenAIEmbeddingInput, + MultiModalOpenAIEmbedding, OpenAIEmbeddingResult, OpenAIModelInfo, RerankInput, @@ -35,6 +35,7 @@ EmbeddingDtype, ImageCorruption, InferenceEngine, + Modality, ModelNotDeployedError, PoolingMethod, ) @@ -224,25 +225,116 @@ def _resolve_mixed_input( dependencies=route_dependencies, operation_id="embeddings", ) - async def _embeddings(data: OpenAIEmbeddingInput): - """Encode Embeddings + async def _embeddings(data: MultiModalOpenAIEmbedding): + """Encode Embeddings. Supports with multimodal inputs. + ## Running Text Embeddings ```python - import requests + import requests, base64 requests.post("http://..:7997/embeddings", - json={"model":"BAAI/bge-small-en-v1.5","input":["A sentence to encode."]}) + json={"model":"openai/clip-vit-base-patch32","input":["Two cute cats."]}) + ``` + + ## Running Image Embeddings + ```python + requests.post("http://..:7997/embeddings", + json={ + "model": "openai/clip-vit-base-patch32", + "encoding_format": "base64", + "input": [ + http://images.cocodataset.org/val2017/000000039769.jpg", + # can also be base64 encoded + ], + # set extra modality to image to process as image + "infinity_extra_modality": "image" + ) + ``` + + ## Running Audio Embeddings + ```python + import requests, base64 + url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" + + def url_to_base64(url, modality = "image"): + '''small helper to convert url to base64 without server requiring access to the url''' + response = requests.get(url) + response.raise_for_status() + base64_encoded = base64.b64encode(response.content).decode('utf-8') + mimetype = f"{modality}/{url.split('.')[-1]}" + return f"data:{mimetype};base64,{base64_encoded}" + + requests.post("http://localhost:7997/embeddings", + json={ + "model": "laion/larger_clap_general", + "encoding_format": "float", + "input": [ + url, url_to_base64(url, "audio") + ], + # set extra modality to audio to process as audio + "infinity_extra_modality": "audio" + } + ) + ``` + + ## Running via OpenAI Client + ```python + from openai import OpenAI # pip install openai==1.51.0 + client = OpenAI(base_url="http://localhost:7997/") + client.embeddings.create( + model="laion/larger_clap_general", + input=[url_to_base64(url, "audio")], + encoding_format= "base64", + extra_body={ + "infinity_extra_modality": "audio" + } + ) + + client.embeddings.create( + model="laion/larger_clap_general", + input=["the sound of a beep", "the sound of a cat"], + encoding_format= "base64", + extra_body={ + "infinity_extra_modality": "text" + } + ) + ``` + + ### Hint: Run all the above models on one server: + ```bash + infinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general ``` """ - engine = _resolve_engine(data.model) - try: - if isinstance(data.input, str): - data.input = [data.input] + modality = data.root.infinity_extra_modality + data_root = data.root + engine = _resolve_engine(data_root.model) - logger.debug("[📝] Received request with %s inputs ", len(data.input)) + try: start = time.perf_counter() - - embedding, usage = await engine.embed(sentences=data.input) + if modality == Modality.text: + if isinstance(data_root.input, str): + input_ = [data_root.input] + else: + input_ = data_root.input # type: ignore + logger.debug( + "[📝] Received request with %s input texts ", + len(input_), # type: ignore + ) + embedding, usage = await engine.embed(sentences=input_) + elif modality == Modality.audio: + urls_or_bytes = _resolve_mixed_input(data_root.input) # type: ignore + logger.debug( + "[📝] Received request with %s input audios ", + len(urls_or_bytes), # type: ignore + ) + embedding, usage = await engine.audio_embed(audios=urls_or_bytes) + elif modality == Modality.image: + urls_or_bytes = _resolve_mixed_input(data_root.input) # type: ignore + logger.debug( + "[📝] Received request with %s input images ", + len(urls_or_bytes), # type: ignore + ) + embedding, usage = await engine.image_embed(images=urls_or_bytes) duration = (time.perf_counter() - start) * 1000 logger.debug("[✅] Done in %s ms", duration) @@ -250,12 +342,22 @@ async def _embeddings(data: OpenAIEmbeddingInput): return OpenAIEmbeddingResult.to_embeddings_response( embeddings=embedding, engine_args=engine.engine_args, - encoding_format=data.encoding_format, + encoding_format=data_root.encoding_format, usage=usage, ) except ModelNotDeployedError as ex: raise errors.OpenAIException( - f"ModelNotDeployedError: model=`{data.model}` does not support `embed`. Reason: {ex}", + f"ModelNotDeployedError: model=`{data_root.model}` does not support `embed` for modality `{modality.value}`. Reason: {ex}", + code=status.HTTP_400_BAD_REQUEST, + ) + except (ImageCorruption, AudioCorruption) as ex: + # get urls_or_bytes if not defined + try: + urls_or_bytes = urls_or_bytes + except NameError: + urls_or_bytes = [] + raise errors.OpenAIException( + f"{modality.value}Corruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except Exception as ex: @@ -368,6 +470,8 @@ async def _classify(data: ClassifyInput): response_class=responses.ORJSONResponse, dependencies=route_dependencies, operation_id="embeddings_image", + deprecated=True, + summary="Deprecated: Use `embeddings` with `infinity_extra_modality` set to `image`", ) async def _embeddings_image(data: ImageEmbeddingInput): """Encode Embeddings from Image files @@ -425,6 +529,8 @@ async def _embeddings_image(data: ImageEmbeddingInput): response_class=responses.ORJSONResponse, dependencies=route_dependencies, operation_id="embeddings_audio", + deprecated=True, + summary="Deprecated: Use `embeddings` with `infinity_extra_modality` set to `audio`", ) async def _embeddings_audio(data: AudioEmbeddingInput): """Encode Embeddings from Audio files @@ -799,7 +905,7 @@ def v2( def cli(): if len(sys.argv) == 1 or sys.argv[1] not in ["v1", "v2", "help", "--help"]: - for _ in range(9): + for _ in range(3): logger.error( "Error: No command given. Defaulting to `v1`. " "Relying on this side effect is considered an error and " diff --git a/libs/infinity_emb/infinity_emb/primitives.py b/libs/infinity_emb/infinity_emb/primitives.py index 890f4800..00fe8682 100644 --- a/libs/infinity_emb/infinity_emb/primitives.py +++ b/libs/infinity_emb/infinity_emb/primitives.py @@ -402,3 +402,9 @@ class AudioCorruption(Exception): ModelCapabilites = Literal["embed", "rerank", "classify", "image_embed", "audio_embed"] + + +class Modality(str, enum.Enum): + text = "text" + audio = "audio" + image = "image" diff --git a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py index 999fea45..6dac01d6 100644 --- a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py +++ b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py @@ -24,14 +24,16 @@ async def resolve_audio( try: audio_bytes = io.BytesIO(audio) except Exception as e: - raise AudioCorruption(f"Error opening audio: {e}") + raise AudioCorruption(f"Error opening audio from bytes: {e}") else: try: downloaded = await (await session.get(audio)).read() - # downloaded = requests.get(audio, stream=True).content + # audio_bytes = io.BytesIO(downloaded) except Exception as e: - raise AudioCorruption(f"Error downloading audio.\nError msg: {str(e)}") + raise AudioCorruption( + f"Error downloading audio from {audio}. \nError msg: {str(e)}" + ) try: data, rate = sf.read(audio_bytes) diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock index 869244bf..331bfcba 100644 --- a/libs/infinity_emb/poetry.lock +++ b/libs/infinity_emb/poetry.lock @@ -695,6 +695,17 @@ files = [ {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"}, ] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "einops" version = "0.8.0" @@ -757,22 +768,23 @@ test = ["pytest (>=6)"] [[package]] name = "fastapi" -version = "0.110.2" +version = "0.115.0" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.8" files = [ - {file = "fastapi-0.110.2-py3-none-any.whl", hash = "sha256:239403f2c0a3dda07a9420f95157a7f014ddb2b770acdbc984f9bdf3ead7afdb"}, - {file = "fastapi-0.110.2.tar.gz", hash = "sha256:b53d673652da3b65e8cd787ad214ec0fe303cad00d2b529b86ce7db13f17518d"}, + {file = "fastapi-0.115.0-py3-none-any.whl", hash = "sha256:17ea427674467486e997206a5ab25760f6b09e069f099b96f5b55a32fb6f1631"}, + {file = "fastapi-0.115.0.tar.gz", hash = "sha256:f93b4ca3529a8ebc6fc3fcf710e5efa8de3df9b41570958abf1d97d843138004"}, ] [package.dependencies] pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" -starlette = ">=0.37.2,<0.38.0" +starlette = ">=0.37.2,<0.39.0" typing-extensions = ">=4.8.0" [package.extras] -all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"] [[package]] name = "filelock" @@ -1250,6 +1262,76 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jiter" +version = "0.5.0" +description = "Fast iterable JSON parser." +optional = false +python-versions = ">=3.8" +files = [ + {file = "jiter-0.5.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b599f4e89b3def9a94091e6ee52e1d7ad7bc33e238ebb9c4c63f211d74822c3f"}, + {file = "jiter-0.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a063f71c4b06225543dddadbe09d203dc0c95ba352d8b85f1221173480a71d5"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acc0d5b8b3dd12e91dd184b87273f864b363dfabc90ef29a1092d269f18c7e28"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c22541f0b672f4d741382a97c65609332a783501551445ab2df137ada01e019e"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63314832e302cc10d8dfbda0333a384bf4bcfce80d65fe99b0f3c0da8945a91a"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a25fbd8a5a58061e433d6fae6d5298777c0814a8bcefa1e5ecfff20c594bd749"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:503b2c27d87dfff5ab717a8200fbbcf4714516c9d85558048b1fc14d2de7d8dc"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d1f3d27cce923713933a844872d213d244e09b53ec99b7a7fdf73d543529d6d"}, + {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c95980207b3998f2c3b3098f357994d3fd7661121f30669ca7cb945f09510a87"}, + {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:afa66939d834b0ce063f57d9895e8036ffc41c4bd90e4a99631e5f261d9b518e"}, + {file = "jiter-0.5.0-cp310-none-win32.whl", hash = "sha256:f16ca8f10e62f25fd81d5310e852df6649af17824146ca74647a018424ddeccf"}, + {file = "jiter-0.5.0-cp310-none-win_amd64.whl", hash = "sha256:b2950e4798e82dd9176935ef6a55cf6a448b5c71515a556da3f6b811a7844f1e"}, + {file = "jiter-0.5.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d4c8e1ed0ef31ad29cae5ea16b9e41529eb50a7fba70600008e9f8de6376d553"}, + {file = "jiter-0.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c6f16e21276074a12d8421692515b3fd6d2ea9c94fd0734c39a12960a20e85f3"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5280e68e7740c8c128d3ae5ab63335ce6d1fb6603d3b809637b11713487af9e6"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:583c57fc30cc1fec360e66323aadd7fc3edeec01289bfafc35d3b9dcb29495e4"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26351cc14507bdf466b5f99aba3df3143a59da75799bf64a53a3ad3155ecded9"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4829df14d656b3fb87e50ae8b48253a8851c707da9f30d45aacab2aa2ba2d614"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42a4bdcf7307b86cb863b2fb9bb55029b422d8f86276a50487982d99eed7c6e"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04d461ad0aebf696f8da13c99bc1b3e06f66ecf6cfd56254cc402f6385231c06"}, + {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6375923c5f19888c9226582a124b77b622f8fd0018b843c45eeb19d9701c403"}, + {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cec323a853c24fd0472517113768c92ae0be8f8c384ef4441d3632da8baa646"}, + {file = "jiter-0.5.0-cp311-none-win32.whl", hash = "sha256:aa1db0967130b5cab63dfe4d6ff547c88b2a394c3410db64744d491df7f069bb"}, + {file = "jiter-0.5.0-cp311-none-win_amd64.whl", hash = "sha256:aa9d2b85b2ed7dc7697597dcfaac66e63c1b3028652f751c81c65a9f220899ae"}, + {file = "jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9f664e7351604f91dcdd557603c57fc0d551bc65cc0a732fdacbf73ad335049a"}, + {file = "jiter-0.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:044f2f1148b5248ad2c8c3afb43430dccf676c5a5834d2f5089a4e6c5bbd64df"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:702e3520384c88b6e270c55c772d4bd6d7b150608dcc94dea87ceba1b6391248"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:528d742dcde73fad9d63e8242c036ab4a84389a56e04efd854062b660f559544"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8cf80e5fe6ab582c82f0c3331df27a7e1565e2dcf06265afd5173d809cdbf9ba"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44dfc9ddfb9b51a5626568ef4e55ada462b7328996294fe4d36de02fce42721f"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c451f7922992751a936b96c5f5b9bb9312243d9b754c34b33d0cb72c84669f4e"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:308fce789a2f093dca1ff91ac391f11a9f99c35369117ad5a5c6c4903e1b3e3a"}, + {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7f5ad4a7c6b0d90776fdefa294f662e8a86871e601309643de30bf94bb93a64e"}, + {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ea189db75f8eca08807d02ae27929e890c7d47599ce3d0a6a5d41f2419ecf338"}, + {file = "jiter-0.5.0-cp312-none-win32.whl", hash = "sha256:e3bbe3910c724b877846186c25fe3c802e105a2c1fc2b57d6688b9f8772026e4"}, + {file = "jiter-0.5.0-cp312-none-win_amd64.whl", hash = "sha256:a586832f70c3f1481732919215f36d41c59ca080fa27a65cf23d9490e75b2ef5"}, + {file = "jiter-0.5.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f04bc2fc50dc77be9d10f73fcc4e39346402ffe21726ff41028f36e179b587e6"}, + {file = "jiter-0.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6f433a4169ad22fcb550b11179bb2b4fd405de9b982601914ef448390b2954f3"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad4a6398c85d3a20067e6c69890ca01f68659da94d74c800298581724e426c7e"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6baa88334e7af3f4d7a5c66c3a63808e5efbc3698a1c57626541ddd22f8e4fbf"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ece0a115c05efca597c6d938f88c9357c843f8c245dbbb53361a1c01afd7148"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:335942557162ad372cc367ffaf93217117401bf930483b4b3ebdb1223dbddfa7"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:649b0ee97a6e6da174bffcb3c8c051a5935d7d4f2f52ea1583b5b3e7822fbf14"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4be354c5de82157886ca7f5925dbda369b77344b4b4adf2723079715f823989"}, + {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5206144578831a6de278a38896864ded4ed96af66e1e63ec5dd7f4a1fce38a3a"}, + {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8120c60f8121ac3d6f072b97ef0e71770cc72b3c23084c72c4189428b1b1d3b6"}, + {file = "jiter-0.5.0-cp38-none-win32.whl", hash = "sha256:6f1223f88b6d76b519cb033a4d3687ca157c272ec5d6015c322fc5b3074d8a5e"}, + {file = "jiter-0.5.0-cp38-none-win_amd64.whl", hash = "sha256:c59614b225d9f434ea8fc0d0bec51ef5fa8c83679afedc0433905994fb36d631"}, + {file = "jiter-0.5.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0af3838cfb7e6afee3f00dc66fa24695199e20ba87df26e942820345b0afc566"}, + {file = "jiter-0.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:550b11d669600dbc342364fd4adbe987f14d0bbedaf06feb1b983383dcc4b961"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:489875bf1a0ffb3cb38a727b01e6673f0f2e395b2aad3c9387f94187cb214bbf"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b250ca2594f5599ca82ba7e68785a669b352156260c5362ea1b4e04a0f3e2389"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ea18e01f785c6667ca15407cd6dabbe029d77474d53595a189bdc813347218e"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462a52be85b53cd9bffd94e2d788a09984274fe6cebb893d6287e1c296d50653"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92cc68b48d50fa472c79c93965e19bd48f40f207cb557a8346daa020d6ba973b"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c834133e59a8521bc87ebcad773608c6fa6ab5c7a022df24a45030826cf10bc"}, + {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab3a71ff31cf2d45cb216dc37af522d335211f3a972d2fe14ea99073de6cb104"}, + {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cccd3af9c48ac500c95e1bcbc498020c87e1781ff0345dd371462d67b76643eb"}, + {file = "jiter-0.5.0-cp39-none-win32.whl", hash = "sha256:368084d8d5c4fc40ff7c3cc513c4f73e02c85f6009217922d0823a48ee7adf61"}, + {file = "jiter-0.5.0-cp39-none-win_amd64.whl", hash = "sha256:ce03f7b4129eb72f1687fa11300fbf677b02990618428934662406d2a76742a1"}, + {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"}, +] + [[package]] name = "joblib" version = "1.4.2" @@ -2061,6 +2143,30 @@ packaging = "*" protobuf = "*" sympy = "*" +[[package]] +name = "openai" +version = "1.51.0" +description = "The official Python library for the openai API" +optional = false +python-versions = ">=3.7.1" +files = [ + {file = "openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c"}, + {file = "openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +jiter = ">=0.4.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.11,<5" + +[package.extras] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] + [[package]] name = "optimum" version = "1.22.0" @@ -3359,7 +3465,7 @@ files = [ name = "setuptools" version = "75.1.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "setuptools-75.1.0-py3-none-any.whl", hash = "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2"}, @@ -3660,31 +3766,31 @@ files = [ [[package]] name = "torch" -version = "2.4.0" +version = "2.4.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false python-versions = ">=3.8.0" files = [ - {file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"}, - {file = "torch-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c4ca297b7bd58b506bfd6e78ffd14eb97c0e7797dcd7965df62f50bb575d8954"}, - {file = "torch-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2497cbc7b3c951d69b276ca51fe01c2865db67040ac67f5fc20b03e41d16ea4a"}, - {file = "torch-2.4.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:685418ab93730efbee71528821ff54005596970dd497bf03c89204fb7e3f71de"}, - {file = "torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e743adadd8c8152bb8373543964551a7cb7cc20ba898dc8f9c0cdbe47c283de0"}, - {file = "torch-2.4.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7334325c0292cbd5c2eac085f449bf57d3690932eac37027e193ba775703c9e6"}, - {file = "torch-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:97730014da4c57ffacb3c09298c6ce05400606e890bd7a05008d13dd086e46b1"}, - {file = "torch-2.4.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f169b4ea6dc93b3a33319611fcc47dc1406e4dd539844dcbd2dec4c1b96e166d"}, - {file = "torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:997084a0f9784d2a89095a6dc67c7925e21bf25dea0b3d069b41195016ccfcbb"}, - {file = "torch-2.4.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:bc3988e8b36d1e8b998d143255d9408d8c75da4ab6dd0dcfd23b623dfb0f0f57"}, - {file = "torch-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:3374128bbf7e62cdaed6c237bfd39809fbcfaa576bee91e904706840c3f2195c"}, - {file = "torch-2.4.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:91aaf00bfe1ffa44dc5b52809d9a95129fca10212eca3ac26420eb11727c6288"}, - {file = "torch-2.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cc30457ea5489c62747d3306438af00c606b509d78822a88f804202ba63111ed"}, - {file = "torch-2.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a046491aaf96d1215e65e1fa85911ef2ded6d49ea34c8df4d0638879f2402eef"}, - {file = "torch-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:688eec9240f3ce775f22e1e1a5ab9894f3d5fe60f3f586deb7dbd23a46a83916"}, - {file = "torch-2.4.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:3af4de2a618fb065e78404c4ba27a818a7b7957eaeff28c6c66ce7fb504b68b8"}, - {file = "torch-2.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:618808d3f610d5f180e47a697d4ec90b810953bb1e020f424b2ac7fb0884b545"}, - {file = "torch-2.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:ed765d232d23566052ba83632ec73a4fccde00b4c94ad45d63b471b09d63b7a7"}, - {file = "torch-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2feb98ac470109472fb10dfef38622a7ee08482a16c357863ebc7bc7db7c8f7"}, - {file = "torch-2.4.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8940fc8b97a4c61fdb5d46a368f21f4a3a562a17879e932eb51a5ec62310cb31"}, + {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"}, + {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"}, + {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"}, + {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"}, + {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"}, + {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"}, + {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"}, + {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"}, + {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"}, + {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"}, + {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"}, + {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"}, + {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"}, + {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"}, + {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"}, + {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"}, + {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"}, + {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"}, + {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"}, + {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"}, ] [package.dependencies] @@ -3703,6 +3809,7 @@ nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \" nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +setuptools = "*" sympy = "*" triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""} typing-extensions = ">=4.8.0" @@ -3713,42 +3820,37 @@ optree = ["optree (>=0.11.0)"] [[package]] name = "torchvision" -version = "0.19.0" +version = "0.19.1" description = "image and video datasets and models for torch deep learning" optional = true python-versions = ">=3.8" files = [ - {file = "torchvision-0.19.0-1-cp310-cp310-win_amd64.whl", hash = "sha256:6ed066aae5c50465d7c4761357aefe5dbd2eb7075a33ab8c14b352fc2353ad4c"}, - {file = "torchvision-0.19.0-1-cp311-cp311-win_amd64.whl", hash = "sha256:6b1bce2e4c003d890a18f14ff289528707d918e38539ff890ef02aa31dae1b56"}, - {file = "torchvision-0.19.0-1-cp312-cp312-win_amd64.whl", hash = "sha256:13aee7a46e049c8c1e7d35a0394b0587a7e62ff3d1a822cd2bbbacb675ac4a09"}, - {file = "torchvision-0.19.0-1-cp38-cp38-win_amd64.whl", hash = "sha256:2acc436d043d4f81b3bc6929cbfa4ef1cdae4d8a0b04ec72ec30a497e9a38179"}, - {file = "torchvision-0.19.0-1-cp39-cp39-win_amd64.whl", hash = "sha256:b5f70f5a8bd9c8b00a076bf466b39b5cd679ef62587c47cc048adb04d9c5f155"}, - {file = "torchvision-0.19.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec874ef85dcb24c69e600f6e276af892c80cde3ffdaeb7275efda463242bc2a8"}, - {file = "torchvision-0.19.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:106842b1e475b14d9a04ee0d6f5477d43100e3bb78e9d31e37422384d0d84179"}, - {file = "torchvision-0.19.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d467d434005fd05a227a2ba7af4c591bb67e6d4a97bbd06eda8da83f43e9fd07"}, - {file = "torchvision-0.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:f77ac31f7337d0f6f4b58e65582c6c93b9d9eeec7dfd7478896b5cdc19a2d60d"}, - {file = "torchvision-0.19.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dbf3aa71a3899244fc884303ed3c4604a160824fefac77e82317a5463efc1d9b"}, - {file = "torchvision-0.19.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:ec4162dc71d9db7f0b51d0f92491929c1419605ff436e1305e50de13504a1c30"}, - {file = "torchvision-0.19.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:4e6aa4fa3f0bc3599fa071c149e651a3e6bdd67c9161794478f9f91471c406a2"}, - {file = "torchvision-0.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac5525d5cc09e425b5cf5752ecf66eefbbbd8c8cd945198ce35eb01a694e6069"}, - {file = "torchvision-0.19.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c09ef8ed184fa877f6251b620226e74f682b8f1d6b341456428d4955b8d9c670"}, - {file = "torchvision-0.19.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:02f1dd5cfc897957535b41b0258ec452d30de044e20c2de2c75869f7708e7656"}, - {file = "torchvision-0.19.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:be0f27a28b8e9f2ae98a31af34a4bdd2a5bf154d92bd73a5797c8d2156fb3ab6"}, - {file = "torchvision-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6ba7756f75c80212e51d3576f85ea204589e0c16efdb9b835dd677bc8929a67"}, - {file = "torchvision-0.19.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:854e967a16a9409e941b5bbe5aa357b23f7158bccb9de35ae20fd4945f05ecd1"}, - {file = "torchvision-0.19.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:d9afb8a3c3ce99a161a64c2a3b91cb545632a72118053cbfb84e87a02a8dcd02"}, - {file = "torchvision-0.19.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:079a696e0b2cb52e4be30afa8e9b3d7d280f02a2b5ffedd7e821fa1efd1a5a8d"}, - {file = "torchvision-0.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:aaa338ff3a55a8c0f94e0e64eff6fe2af1fc933a95fd43812760e72ea66e986b"}, - {file = "torchvision-0.19.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dd1279571d4b68d5a53d9b7a35aedf91c4cb1e0b08099f6a1effa7b25b8c95e7"}, - {file = "torchvision-0.19.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4d54b5e19b7ebebca7d0b08497b4c6335264cad04c94c05fa35988d9e9eed0c4"}, - {file = "torchvision-0.19.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5f9a598dcf82bdfc8e4436ce74763b3877dabec3b33f94613b94ede13e3e4dee"}, - {file = "torchvision-0.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:ec1281c10402234d470bfd4d53663d81f4364f293b2f8fe24d4a7a1adc78c90c"}, + {file = "torchvision-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:54e8513099e6f586356c70f809d34f391af71ad182fe071cc328a28af2c40608"}, + {file = "torchvision-0.19.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:20a1f5e02bfdad7714e55fa3fa698347c11d829fa65e11e5a84df07d93350eed"}, + {file = "torchvision-0.19.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:7b063116164be52fc6deb4762de7f8c90bfa3a65f8d5caf17f8e2d5aadc75a04"}, + {file = "torchvision-0.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:f40b6acabfa886da1bc3768f47679c61feee6bde90deb979d9f300df8c8a0145"}, + {file = "torchvision-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40514282b4896d62765b8e26d7091c32e17c35817d00ec4be2362ea3ba3d1787"}, + {file = "torchvision-0.19.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:5a91be061ae5d6d5b95e833b93e57ca4d3c56c5a57444dd15da2e3e7fba96050"}, + {file = "torchvision-0.19.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d71a6a6fe3a5281ca3487d4c56ad4aad20ff70f82f1d7c79bcb6e7b0c2af00c8"}, + {file = "torchvision-0.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:70dea324174f5e9981b68e4b7cd524512c106ba64aedef560a86a0bbf2fbf62c"}, + {file = "torchvision-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27ece277ff0f6cdc7fed0627279c632dcb2e58187da771eca24b0fbcf3f8590d"}, + {file = "torchvision-0.19.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:c659ff92a61f188a1a7baef2850f3c0b6c85685447453c03d0e645ba8f1dcc1c"}, + {file = "torchvision-0.19.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:c07bf43c2a145d792ecd9d0503d6c73577147ece508d45600d8aac77e4cdfcf9"}, + {file = "torchvision-0.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b4283d283675556bb0eae31d29996f53861b17cbdcdf3509e6bc050414ac9289"}, + {file = "torchvision-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c4e4f5b24ea6b087b02ed492ab1e21bba3352c4577e2def14248cfc60732338"}, + {file = "torchvision-0.19.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9281d63ead929bb19143731154cd1d8bf0b5e9873dff8578a40e90a6bec3c6fa"}, + {file = "torchvision-0.19.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:4d10bc9083c4d5fadd7edd7b729700a7be48dab4f62278df3bc73fa48e48a155"}, + {file = "torchvision-0.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:ccf085ef1824fb9e16f1901285bf89c298c62dfd93267a39e8ee42c71255242f"}, + {file = "torchvision-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:731f434d91586769e255b5d70ed1a4457e0a1394a95f4aacf0e1e7e21f80c098"}, + {file = "torchvision-0.19.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:febe4f14d4afcb47cc861d8be7760ab6a123cd0817f97faf5771488cb6aa90f4"}, + {file = "torchvision-0.19.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e328309b8670a2e889b2fe76a1c2744a099c11c984da9a822357bd9debd699a5"}, + {file = "torchvision-0.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:6616f12e00a22e7f3fedbd0fccb0804c05e8fe22871668f10eae65cf3f283614"}, ] [package.dependencies] numpy = "*" pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0" -torch = "2.4.0" +torch = "2.4.1" [package.extras] gdown = ["gdown (>=4.7.3)"] @@ -3876,6 +3978,11 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, + {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, + {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, + {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, + {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, + {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -4661,4 +4768,4 @@ vision = ["pillow", "timm"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "81ba6bb614937cb06c75c15232bbba7af3797ec8e0936723411fad07bf211a48" +content-hash = "806fe858fc679b8842cb847d213b8607a887cdfe58c28bab1def8bfde0fbda05" diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 55d9b61d..f9ba99cd 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -56,11 +56,12 @@ coverage = {extras = ["toml"], version = "^7.3.2"} mypy = "^1.5.1" requests = "2.28.1" types-requests = "2.28.1" +openai = "*" # 1.51.0 works # preferred dev dependencies -torch = "2.4.0" +torch = "2.4.1" prometheus-fastapi-instrumentator = "7.0.0" -fastapi = "0.110.2" +fastapi = "0.115.0" [tool.poetry.group.codespell.dependencies] codespell = "^2.2.0" diff --git a/libs/infinity_emb/tests/conftest.py b/libs/infinity_emb/tests/conftest.py index 9705161a..121f4b72 100644 --- a/libs/infinity_emb/tests/conftest.py +++ b/libs/infinity_emb/tests/conftest.py @@ -10,7 +10,7 @@ pytest.DEFAULT_RERANKER_MODEL = "mixedbread-ai/mxbai-rerank-xsmall-v1" pytest.DEFAULT_CLASSIFIER_MODEL = "SamLowe/roberta-base-go_emotions" pytest.DEFAULT_AUDIO_MODEL = "laion/clap-htsat-unfused" -pytest.DEFAULT_VISION_MODEL = "wkcn/TinyCLIP-ViT-8M-16-Text-3M-YFCC15M" +pytest.DEFAULT_IMAGE_MODEL = "wkcn/TinyCLIP-ViT-8M-16-Text-3M-YFCC15M" pytest.IMAGE_SAMPLE_URL = "https://github.com/michaelfeil/infinity/raw/06fd1f4d8f0a869f4482fc1c78b62a75ccbb66a1/docs/assets/cats_coco_sample.jpg" pytest.AUDIO_SAMPLE_URL = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" diff --git a/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py b/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py index 99fc45c9..d6362958 100644 --- a/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py +++ b/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py @@ -6,7 +6,6 @@ import sys import time from unittest import TestCase -from uuid import uuid4 import numpy as np import pytest @@ -18,8 +17,8 @@ from infinity_emb.primitives import InferenceEngine PREFIX = "" -MODEL_NAME = str(uuid4()) -MODEL_NAME_2 = str(uuid4()) +MODEL_NAME = "dummy-number-1" +MODEL_NAME_2 = "dummy-number-2" BATCH_SIZE = 16 PATH_OPENAPI = pathlib.Path(__file__).parent.parent.parent.parent.parent.joinpath( @@ -67,22 +66,22 @@ async def test_model_route(client): assert "unix" in respnse_health.json() -@pytest.mark.parametrize("model_name", [MODEL_NAME, MODEL_NAME_2]) @pytest.mark.anyio -async def test_embedding_max_length(client, model_name): +async def test_embedding_max_length(client): # TOO long - input = "%_" * 4097 * 15 - response = await client.post( - f"{PREFIX}/embeddings", json=dict(input=input, model=model_name) - ) - assert response.status_code == 422, f"{response.status_code}, {response.text}" - # works - input = "%_" * 4096 * 15 - response = await client.post( - f"{PREFIX}/embeddings", json=dict(input=input, model=model_name) - ) - assert response.status_code == 200, f"{response.status_code}, {response.text}" - assert response.json()["model"] == model_name + for model_name in [MODEL_NAME, MODEL_NAME_2]: + input = "%_" * 4097 * 15 + response = await client.post( + f"{PREFIX}/embeddings", json=dict(input=input, model=model_name) + ) + assert response.status_code == 422, f"{response.status_code}, {response.text}" + # works + input = "%_" * 4096 * 15 + response = await client.post( + f"{PREFIX}/embeddings", json=dict(input=input, model=model_name) + ) + assert response.status_code == 200, f"{response.status_code}, {response.text}" + assert response.json()["model"] == model_name @pytest.mark.parametrize("model_name", [MODEL_NAME]) diff --git a/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py b/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py new file mode 100644 index 00000000..0702f1ad --- /dev/null +++ b/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py @@ -0,0 +1,160 @@ +# type: ignore + +import base64 + +import numpy as np +import pytest +import requests +from asgi_lifespan import LifespanManager +from httpx import AsyncClient +from openai import APIConnectionError, AsyncOpenAI +from sentence_transformers import SentenceTransformer # type: ignore + +from infinity_emb import create_server +from infinity_emb.args import EngineArgs + +PREFIX = "" +MODEL: str = ( + "michaelfeil/bge-small-en-v1.5" # pytest.DEFAULT_BERT_MODEL # type: ignore +) +baseurl = "http://openaidemo" +batch_size = 8 + +app = create_server( + url_prefix=PREFIX, + engine_args_list=[ + EngineArgs( + model_name_or_path=pytest.DEFAULT_AUDIO_MODEL, + batch_size=batch_size, + ), + EngineArgs( + model_name_or_path=pytest.DEFAULT_IMAGE_MODEL, + batch_size=batch_size, + ), + EngineArgs( + model_name_or_path=pytest.DEFAULT_BERT_MODEL, + batch_size=batch_size, + ), + ], + api_key="some_dummy_key", +) + + +@pytest.fixture +def model_base() -> SentenceTransformer: + return SentenceTransformer(MODEL) + + +@pytest.fixture() +async def client(): + async with AsyncClient( + app=app, base_url=baseurl, timeout=20 + ) as client, LifespanManager(app): + yield client + + +def url_to_base64(url, modality="image"): + """small helper to convert url to base64 without server requiring access to the url""" + response = requests.get(url) + response.raise_for_status() + base64_encoded = base64.b64encode(response.content).decode("utf-8") + mimetype = f"{modality}/{url.split('.')[-1]}" + return f"data:{mimetype};base64,{base64_encoded}" + + +@pytest.mark.anyio +async def test_openai(client: AsyncClient): + client_oai = AsyncOpenAI( + api_key="some_dummy_key", base_url=baseurl, http_client=client + ) + + async with client_oai: + # test audio + emb1_audio_from_text = await client_oai.embeddings.create( + model=pytest.DEFAULT_AUDIO_MODEL, + input=[ + "the sound of a beep", + "the sound of a cat", + "the sound of a dog", + "the sound of a bird", + ], + encoding_format="float", + extra_body={"infinity_extra_modality": "text"}, + ) + emb1_audio = await client_oai.embeddings.create( + model=pytest.DEFAULT_AUDIO_MODEL, + input=[url_to_base64(pytest.AUDIO_SAMPLE_URL, "audio")], + encoding_format="float", + extra_body={"infinity_extra_modality": "audio"}, + ) + emb1_1_audio = await client_oai.embeddings.create( + model=pytest.DEFAULT_AUDIO_MODEL, + input=[pytest.AUDIO_SAMPLE_URL], + encoding_format="float", + extra_body={"infinity_extra_modality": "audio"}, + ) + # test: image + emb_1_image_from_text = await client_oai.embeddings.create( + model=pytest.DEFAULT_IMAGE_MODEL, + input=["a cat", "a dog", "a bird"], + encoding_format="float", + extra_body={"infinity_extra_modality": "text"}, + ) + emb_1_image = await client_oai.embeddings.create( + model=pytest.DEFAULT_IMAGE_MODEL, + input=[url_to_base64(pytest.IMAGE_SAMPLE_URL, "image")], # image is a cat + encoding_format="float", + extra_body={"infinity_extra_modality": "image"}, + ) + emb_1_1_image = await client_oai.embeddings.create( + model=pytest.DEFAULT_IMAGE_MODEL, + input=[pytest.IMAGE_SAMPLE_URL], + encoding_format="float", + extra_body={"infinity_extra_modality": "image"}, + ) + + # test: text + emb_1_text = await client_oai.embeddings.create( + model=pytest.DEFAULT_BERT_MODEL, + input=["a cat", "a cat", "a bird"], + encoding_format="float", + extra_body={"infinity_extra_modality": "text"}, + ) + + # test AUDIO: cosine distance of beep to cat and dog + np.testing.assert_allclose( + emb1_audio.data[0].embedding, emb1_1_audio.data[0].embedding, rtol=1e-5 + ) + assert all( + np.dot(emb1_audio.data[0].embedding, emb1_audio_from_text.data[0].embedding) + > np.dot(emb1_audio.data[0].embedding, emb1_audio_from_text.data[i].embedding) + for i in range(1, 4) + ) + + # test IMAGE: cosine distance of cat to dog and bird + np.testing.assert_allclose( + emb_1_image.data[0].embedding, emb_1_1_image.data[0].embedding, rtol=1e-5 + ) + assert all( + np.dot(emb_1_image.data[0].embedding, emb_1_image_from_text.data[0].embedding) + > np.dot(emb_1_image.data[0].embedding, emb_1_image_from_text.data[i].embedding) + for i in range(1, 3) + ) + + # test TEXT: cosine distance of cat to dog and bird + np.testing.assert_allclose( + emb_1_text.data[0].embedding, emb_1_text.data[1].embedding, rtol=1e-5 + ) + + # wrong key + with pytest.raises(APIConnectionError): + client_oai = AsyncOpenAI( + api_key="some_wrong", base_url=baseurl, http_client=client + ) + async with client_oai: + await client_oai.embeddings.create( + model=pytest.DEFAULT_AUDIO_MODEL, + input=[pytest.AUDIO_SAMPLE_URL], + encoding_format="float", + extra_body={"infinity_extra_modality": "audio"}, + ) diff --git a/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py b/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py index 18f71d61..a93ffbb9 100644 --- a/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py +++ b/libs/infinity_emb/tests/end_to_end/test_optimum_embedding.py @@ -8,9 +8,7 @@ from infinity_emb.primitives import Device, InferenceEngine PREFIX = "/v1_optimum" -MODEL: str = ( - "michaelfeil/bge-small-en-v1.5" # pytest.DEFAULT_BERT_MODEL # type: ignore -) +MODEL: str = pytest.DEFAULT_BERT_MODEL # type: ignore batch_size = 8 diff --git a/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py b/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py index a1a86974..5170b347 100644 --- a/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py +++ b/libs/infinity_emb/tests/end_to_end/test_sentence_transformers.py @@ -8,7 +8,7 @@ from infinity_emb.args import EngineArgs from infinity_emb.primitives import Device, InferenceEngine -PREFIX = "/v1_ct2" +PREFIX = "/v1_sentence_transformers" MODEL: str = pytest.DEFAULT_BERT_MODEL # type: ignore[assignment] batch_size = 64 if torch.cuda.is_available() else 8 diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index 120e7c63..9d4b5a82 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -8,7 +8,7 @@ from infinity_emb.args import EngineArgs from infinity_emb.primitives import Device, InferenceEngine -PREFIX = "/v1_ct2" +PREFIX = "/v1_audio" MODEL: str = pytest.DEFAULT_AUDIO_MODEL # type: ignore[assignment] batch_size = 32 if torch.cuda.is_available() else 8 @@ -116,35 +116,45 @@ async def test_meta(client, helpers): @pytest.mark.anyio -@pytest.mark.parametrize("no_of_audios", [1, 5, 10]) -async def test_audio_multiple(client, no_of_audios): - audio_urls = [ - "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" - ] * no_of_audios - - response = await client.post( - f"{PREFIX}/embeddings_audio", - json={"model": MODEL, "input": audio_urls}, - ) - assert response.status_code == 200 - rdata = response.json() - rdata_results = rdata["data"] - assert len(rdata_results) == no_of_audios - assert "model" in rdata - assert "usage" in rdata - assert rdata_results[0]["object"] == "embedding" - assert len(rdata_results[0]["embedding"]) > 0 +async def test_audio_multiple(client): + for route in [f"{PREFIX}/embeddings_audio", f"{PREFIX}/embeddings"]: + for no_of_audios in [1, 5, 10]: + audio_urls = [ + "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" + ] * no_of_audios + + response = await client.post( + route, + json={ + "model": MODEL, + "input": audio_urls, + "infinity_extra_modality": "audio", + }, + ) + assert response.status_code == 200 + rdata = response.json() + rdata_results = rdata["data"] + assert len(rdata_results) == no_of_audios + assert "model" in rdata + assert "usage" in rdata + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 @pytest.mark.anyio async def test_audio_fail(client): - audio_url = "https://www.google.com/404" - - response = await client.post( - f"{PREFIX}/embeddings_audio", - json={"model": MODEL, "input": audio_url}, - ) - assert response.status_code == status.HTTP_400_BAD_REQUEST + for route in [f"{PREFIX}/embeddings_audio", f"{PREFIX}/embeddings"]: + audio_url = "https://www.google.com/404" + + response = await client.post( + route, + json={ + "model": MODEL, + "input": audio_url, + "infinity_extra_modality": "audio", + }, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST @pytest.mark.anyio @@ -152,8 +162,12 @@ async def test_audio_empty(client): audio_url_empty = [] response_empty = await client.post( - f"{PREFIX}/embeddings_audio", - json={"model": MODEL, "input": audio_url_empty}, + f"{PREFIX}/embeddings", + json={ + "model": MODEL, + "input": audio_url_empty, + "infinity_extra_modality": "audio", + }, ) assert response_empty.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py b/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py index 3b53aadf..fe1667b0 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_reranker.py @@ -8,7 +8,7 @@ from infinity_emb.args import EngineArgs from infinity_emb.primitives import Device, InferenceEngine -PREFIX = "/v1_ct2" +PREFIX = "/v1_reranker" MODEL: str = pytest.DEFAULT_RERANKER_MODEL # type: ignore[assignment] batch_size = 32 if torch.cuda.is_available() else 8 diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py index 0ca79034..0735b61e 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py @@ -13,7 +13,7 @@ from infinity_emb.primitives import Device, InferenceEngine PREFIX = "/v1_vision" -MODEL: str = pytest.DEFAULT_VISION_MODEL # type: ignore[assignment] +MODEL: str = pytest.DEFAULT_IMAGE_MODEL # type: ignore[assignment] batch_size = 32 if torch.cuda.is_available() else 8 app = create_server( @@ -113,60 +113,70 @@ async def test_vision_base64(client): @pytest.mark.anyio async def test_meta(client, helpers): - image_url = "http://images.cocodataset.org/val2017/000000039769.jpg" - - text_input = ["a cat", "a car", "a fridge"] - image_input = [image_url] - response_text = await client.post( - f"{PREFIX}/embeddings", - json={"model": MODEL, "input": text_input}, - ) - response_image = await client.post( - f"{PREFIX}/embeddings_image", - json={"model": MODEL, "input": image_input}, - ) + for route in [f"{PREFIX}/embeddings_image", f"{PREFIX}/embeddings"]: + image_url = "http://images.cocodataset.org/val2017/000000039769.jpg" + + text_input = ["a cat", "a car", "a fridge"] + image_input = [image_url] + response_text = await client.post( + f"{PREFIX}/embeddings", + json={"model": MODEL, "input": text_input}, + ) + response_image = await client.post( + route, + json={ + "model": MODEL, + "input": image_input, + "infinity_extra_modality": "image", + }, + ) - assert response_text.status_code == 200 - assert response_image.status_code == 200 + assert response_text.status_code == 200 + assert response_image.status_code == 200 - rdata_text = response_text.json() - rdata_results_text = rdata_text["data"] + rdata_text = response_text.json() + rdata_results_text = rdata_text["data"] - rdata_image = response_image.json() - rdata_results_image = rdata_image["data"] + rdata_image = response_image.json() + rdata_results_image = rdata_image["data"] - embeddings_image_cat = rdata_results_image[0]["embedding"] - embeddings_text_cat = rdata_results_text[0]["embedding"] - embeddings_text_car = rdata_results_text[1]["embedding"] - embeddings_text_fridge = rdata_results_text[2]["embedding"] + embeddings_image_cat = rdata_results_image[0]["embedding"] + embeddings_text_cat = rdata_results_text[0]["embedding"] + embeddings_text_car = rdata_results_text[1]["embedding"] + embeddings_text_fridge = rdata_results_text[2]["embedding"] - assert helpers.cosine_similarity( - embeddings_image_cat, embeddings_text_cat - ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_car) - assert helpers.cosine_similarity( - embeddings_image_cat, embeddings_text_cat - ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_fridge) + assert helpers.cosine_similarity( + embeddings_image_cat, embeddings_text_cat + ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_car) + assert helpers.cosine_similarity( + embeddings_image_cat, embeddings_text_cat + ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_fridge) @pytest.mark.anyio -@pytest.mark.parametrize("no_of_images", [1, 5, 10]) -async def test_vision_multiple(client, no_of_images): - image_urls = [ - pytest.IMAGE_SAMPLE_URL, - ] * no_of_images - - response = await client.post( - f"{PREFIX}/embeddings_image", - json={"model": MODEL, "input": image_urls}, - ) - assert response.status_code == 200 - rdata = response.json() - rdata_results = rdata["data"] - assert len(rdata_results) == no_of_images - assert "model" in rdata - assert "usage" in rdata - assert rdata_results[0]["object"] == "embedding" - assert len(rdata_results[0]["embedding"]) > 0 +async def test_vision_multiple(client): + for route in [f"{PREFIX}/embeddings_image", f"{PREFIX}/embeddings"]: + for no_of_images in [1, 5, 10]: + image_urls = [ + pytest.IMAGE_SAMPLE_URL, + ] * no_of_images + + response = await client.post( + route, + json={ + "model": MODEL, + "input": image_urls, + "infinity_extra_modality": "image", + }, + ) + assert response.status_code == 200 + rdata = response.json() + rdata_results = rdata["data"] + assert len(rdata_results) == no_of_images + assert "model" in rdata + assert "usage" in rdata + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 @pytest.mark.anyio