Skip to content

Commit

Permalink
* feat(Supported_Models.ipynb): add new supported model to table
Browse files Browse the repository at this point in the history
  • Loading branch information
NirantK committed Feb 15, 2024
1 parent f213ca1 commit 08bf100
Showing 1 changed file with 41 additions and 20 deletions.
61 changes: 41 additions & 20 deletions docs/examples/Supported_Models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,18 @@
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"\u001b[32m2024-02-15 12:52:09.386\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mfastembed.embedding\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m7\u001b[0m - \u001b[33m\u001b[1mDefaultEmbedding, FlagEmbedding, JinaEmbedding are deprecated. Use TextEmbedding instead.\u001b[0m\n"
]
},
{
"data": {
"text/html": [
Expand Down Expand Up @@ -76,30 +85,38 @@
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>nomic-ai/nomic-embed-text-v1</td>\n",
" <td>768</td>\n",
" <td>8192 context length english model</td>\n",
" <td>0.54</td>\n",
" <td>{'hf': 'nomic-ai/nomic-embed-text-v1'}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>intfloat/multilingual-e5-large</td>\n",
" <td>1024</td>\n",
" <td>Multilingual model, e5-large. Recommend using this model for non-English languages</td>\n",
" <td>2.24</td>\n",
" <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <td>sentence-transformers/paraphrase-multilingual-mpnet-base-v2</td>\n",
" <td>768</td>\n",
" <td>Sentence-transformers model for tasks like clustering or semantic search</td>\n",
" <td>1.11</td>\n",
" <td>{'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <td>jinaai/jina-embeddings-v2-base-en</td>\n",
" <td>768</td>\n",
" <td>English embedding model supporting 8192 sequence length</td>\n",
" <td>0.55</td>\n",
" <td>{'hf': 'xenova/jina-embeddings-v2-base-en'}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <td>jinaai/jina-embeddings-v2-small-en</td>\n",
" <td>512</td>\n",
" <td>English embedding model supporting 8192 sequence length</td>\n",
Expand All @@ -117,46 +134,50 @@
"2 BAAI/bge-small-en-v1.5 384 \n",
"3 BAAI/bge-small-zh-v1.5 512 \n",
"4 sentence-transformers/all-MiniLM-L6-v2 384 \n",
"5 intfloat/multilingual-e5-large 1024 \n",
"6 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n",
"7 jinaai/jina-embeddings-v2-base-en 768 \n",
"8 jinaai/jina-embeddings-v2-small-en 512 \n",
"5 nomic-ai/nomic-embed-text-v1 768 \n",
"6 intfloat/multilingual-e5-large 1024 \n",
"7 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n",
"8 jinaai/jina-embeddings-v2-base-en 768 \n",
"9 jinaai/jina-embeddings-v2-small-en 512 \n",
"\n",
" description \\\n",
"0 Base English model, v1.5 \n",
"1 Large English model, v1.5 \n",
"2 Fast and Default English model \n",
"3 Fast and recommended Chinese model \n",
"4 Sentence Transformer model, MiniLM-L6-v2 \n",
"5 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
"6 Sentence-transformers model for tasks like clustering or semantic search \n",
"7 English embedding model supporting 8192 sequence length \n",
"5 8192 context length english model \n",
"6 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
"7 Sentence-transformers model for tasks like clustering or semantic search \n",
"8 English embedding model supporting 8192 sequence length \n",
"9 English embedding model supporting 8192 sequence length \n",
"\n",
" size_in_GB \\\n",
"0 0.44 \n",
"1 1.34 \n",
"2 0.13 \n",
"3 0.10 \n",
"4 0.09 \n",
"5 2.24 \n",
"6 1.11 \n",
"7 0.55 \n",
"8 0.13 \n",
"5 0.54 \n",
"6 2.24 \n",
"7 1.11 \n",
"8 0.55 \n",
"9 0.13 \n",
"\n",
" sources \n",
"0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} \n",
"1 {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} \n",
"2 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n",
"3 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n",
"4 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n",
"5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n",
"6 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n",
"7 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n",
"8 {'hf': 'xenova/jina-embeddings-v2-small-en'} "
"5 {'hf': 'nomic-ai/nomic-embed-text-v1'} \n",
"6 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n",
"7 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n",
"8 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n",
"9 {'hf': 'xenova/jina-embeddings-v2-small-en'} "
]
},
"execution_count": 4,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit 08bf100

Please sign in to comment.