diff --git a/fastembed/image/onnx_embedding.py b/fastembed/image/onnx_embedding.py index b6e082de..c6a9cd49 100644 --- a/fastembed/image/onnx_embedding.py +++ b/fastembed/image/onnx_embedding.py @@ -12,7 +12,7 @@ { "model": "Qdrant/clip-ViT-B-32-vision", "dim": 512, - "description": "CLIP vision encoder based on ViT-B/32", + "description": "Image embeddings, Multimodal (text&image), 2021 year", "size_in_GB": 0.34, "sources": { "hf": "Qdrant/clip-ViT-B-32-vision", @@ -22,7 +22,7 @@ { "model": "Qdrant/resnet50-onnx", "dim": 2048, - "description": "ResNet-50 from `Deep Residual Learning for Image Recognition `__.", + "description": "Image embeddings, Unimodal (image), 2016 year", "size_in_GB": 0.1, "sources": { "hf": "Qdrant/resnet50-onnx", @@ -32,7 +32,7 @@ { "model": "Qdrant/Unicom-ViT-B-16", "dim": 768, - "description": "Unicom Unicom-ViT-B-16 from open-metric-learning", + "description": "Image embeddings (more detailed than Unicom-ViT-B-32), Multimodal (text&image), 2023 year", "size_in_GB": 0.82, "sources": { "hf": "Qdrant/Unicom-ViT-B-16", @@ -42,7 +42,7 @@ { "model": "Qdrant/Unicom-ViT-B-32", "dim": 512, - "description": "Unicom Unicom-ViT-B-32 from open-metric-learning", + "description": "Image embeddings, Multimodal (text&image), 2023 year", "size_in_GB": 0.48, "sources": { "hf": "Qdrant/Unicom-ViT-B-32",