Skip to content

Commit

Permalink
* chore(docs): update Getting Started.ipynb with progressbar + New Mo…
Browse files Browse the repository at this point in the history
…dels

* * feat(Supported_Models.ipynb): add support for BAAI/bge-small-zh-v1.5 model
* feat(Supported_Models.ipynb): add support for jinaai/jina-embeddings-v2-base-en model
* feat(Supported_Models.ip

* * chore(docs): update Getting Started.ipynb with progressbar
  • Loading branch information
NirantK authored Dec 13, 2023
1 parent ea85e74 commit 5537953
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 27 deletions.
33 changes: 28 additions & 5 deletions docs/Getting Started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,27 @@
"id": "b61c6552",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 76.7M/76.7M [00:05<00:00, 15.0MiB/s]\n",
"100%|██████████| 3/3 [00:00<00:00, 455.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(384,)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
Expand Down Expand Up @@ -112,7 +127,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "145a56ce",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -144,7 +159,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "272c8915",
"metadata": {},
"outputs": [],
Expand All @@ -164,10 +179,18 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"id": "8013eee9",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 4/4 [00:00<00:00, 361.82it/s]\n"
]
}
],
"source": [
"embeddings: List[np.ndarray] = list(embedding_model.embed(documents))"
]
Expand All @@ -182,7 +205,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"id": "0d8c8e08",
"metadata": {},
"outputs": [
Expand Down
67 changes: 45 additions & 22 deletions docs/examples/Supported_Models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,32 +49,53 @@
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>BAAI/bge-small-zh-v1.5</td>\n",
" <td>512</td>\n",
" <td>Fast and recommended Chinese model</td>\n",
" <td>0.10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BAAI/bge-base-en</td>\n",
" <td>768</td>\n",
" <td>Base English model</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <td>BAAI/bge-base-en-v1.5</td>\n",
" <td>768</td>\n",
" <td>Base English model, v1.5</td>\n",
" <td>0.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <td>sentence-transformers/all-MiniLM-L6-v2</td>\n",
" <td>384</td>\n",
" <td>Sentence Transformer model, MiniLM-L6-v2</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <td>intfloat/multilingual-e5-large</td>\n",
" <td>1024</td>\n",
" <td>Multilingual model, e5-large. Recommend using this model for non-English languages</td>\n",
" <td>2.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>jinaai/jina-embeddings-v2-base-en</td>\n",
" <td>768</td>\n",
" <td>English embedding model supporting 8192 sequence length</td>\n",
" <td>0.55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>jinaai/jina-embeddings-v2-small-en</td>\n",
" <td>512</td>\n",
" <td>English embedding model supporting 8192 sequence length</td>\n",
" <td>0.13</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
Expand All @@ -83,26 +104,35 @@
" model dim \\\n",
"0 BAAI/bge-small-en 384 \n",
"1 BAAI/bge-small-en-v1.5 384 \n",
"2 BAAI/bge-base-en 768 \n",
"3 BAAI/bge-base-en-v1.5 768 \n",
"4 sentence-transformers/all-MiniLM-L6-v2 384 \n",
"5 intfloat/multilingual-e5-large 1024 \n",
"2 BAAI/bge-small-zh-v1.5 512 \n",
"3 BAAI/bge-base-en 768 \n",
"4 BAAI/bge-base-en-v1.5 768 \n",
"5 sentence-transformers/all-MiniLM-L6-v2 384 \n",
"6 intfloat/multilingual-e5-large 1024 \n",
"7 jinaai/jina-embeddings-v2-base-en 768 \n",
"8 jinaai/jina-embeddings-v2-small-en 512 \n",
"\n",
" description \\\n",
"0 Fast English model \n",
"1 Fast and Default English model \n",
"2 Base English model \n",
"3 Base English model, v1.5 \n",
"4 Sentence Transformer model, MiniLM-L6-v2 \n",
"5 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
"2 Fast and recommended Chinese model \n",
"3 Base English model \n",
"4 Base English model, v1.5 \n",
"5 Sentence Transformer model, MiniLM-L6-v2 \n",
"6 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
"7 English embedding model supporting 8192 sequence length \n",
"8 English embedding model supporting 8192 sequence length \n",
"\n",
" size_in_GB \n",
"0 0.20 \n",
"1 0.13 \n",
"2 0.50 \n",
"3 0.44 \n",
"4 0.09 \n",
"5 2.24 "
"2 0.10 \n",
"3 0.50 \n",
"4 0.44 \n",
"5 0.09 \n",
"6 2.24 \n",
"7 0.55 \n",
"8 0.13 "
]
},
"execution_count": 1,
Expand All @@ -119,13 +149,6 @@
"pd.set_option('display.max_colwidth', None)\n",
"pd.DataFrame(Embedding.list_supported_models())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit 5537953

Please sign in to comment.