diff --git a/tutorials/tutorial-entity-linking-in-progress.ipynb b/tutorials/tutorial-entity-linking-in-progress.ipynb
index 7ae90f8..fc92d87 100644
--- a/tutorials/tutorial-entity-linking-in-progress.ipynb
+++ b/tutorials/tutorial-entity-linking-in-progress.ipynb
@@ -6,9 +6,9 @@
"id": "w8g9eAcFXPPh"
},
"source": [
- "# A work in progress notebook for entity linking\n",
+ "# IR Lab Tutorial (Research Oriented): Entity Linking For Query Interpretation\n",
"\n",
- "(Submission is currently in progress, looks like we have to lowercase all queries before linking the entities, currently discussing this with Marcel)"
+ "This tutorial shows how to re-use / load entity linkings and derived query interpretations pre-computed in [TIREx](https://www.tira.io/tirex). Please have a look at the [corresponding paper](https://webis.de/publications.html?q=entity#kasturia_2022) for details."
]
},
{
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -37,7 +37,17 @@
"id": "kP6nwrlJSSUw",
"outputId": "14b6a45d-5b30-4a74-eb1a-ef60a47294e6"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n",
+ "\n",
+ "No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n"
+ ]
+ }
+ ],
"source": [
"import pyterrier as pt\n",
"\n",
@@ -50,7 +60,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -67,7 +77,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -110,35 +120,35 @@
"
301 | \n",
" International Organized Crime | \n",
" {'query_id': '301', 'title': 'International Or... | \n",
- " [] | \n",
+ " [{'begin': 14, 'end': 29, 'mention': 'organize... | \n",
" \n",
" \n",
" 1 | \n",
" 302 | \n",
" Poliomyelitis and Post-Polio | \n",
" {'query_id': '302', 'title': 'Poliomyelitis an... | \n",
- " [] | \n",
+ " [{'begin': 0, 'end': 5, 'mention': 'polio', 'u... | \n",
"
\n",
" \n",
" 2 | \n",
" 303 | \n",
" Hubble Telescope Achievements | \n",
" {'query_id': '303', 'title': 'Hubble Telescope... | \n",
- " [] | \n",
+ " [{'begin': 7, 'end': 16, 'mention': 'telescope... | \n",
"
\n",
" \n",
" 3 | \n",
" 304 | \n",
" Endangered Species (Mammals) | \n",
" {'query_id': '304', 'title': 'Endangered Speci... | \n",
- " [] | \n",
+ " [{'begin': 11, 'end': 18, 'mention': 'species'... | \n",
"
\n",
" \n",
" 4 | \n",
" 305 | \n",
" Most Dangerous Vehicles | \n",
" {'query_id': '305', 'title': 'Most Dangerous V... | \n",
- " [] | \n",
+ " [{'begin': 0, 'end': 4, 'mention': 'most', 'ur... | \n",
"
\n",
" \n",
" ... | \n",
@@ -166,7 +176,7 @@
" 698 | \n",
" literacy rates Africa | \n",
" {'query_id': '698', 'title': 'literacy rates A... | \n",
- " [{'begin': 0, 'end': 8, 'mention': 'literacy',... | \n",
+ " [{'begin': 15, 'end': 21, 'mention': 'africa',... | \n",
"
\n",
" \n",
" 248 | \n",
@@ -215,22 +225,22 @@
"249 {'query_id': '700', 'title': 'gasoline tax U.S... \n",
"\n",
" entities \n",
- "0 [] \n",
- "1 [] \n",
- "2 [] \n",
- "3 [] \n",
- "4 [] \n",
+ "0 [{'begin': 14, 'end': 29, 'mention': 'organize... \n",
+ "1 [{'begin': 0, 'end': 5, 'mention': 'polio', 'u... \n",
+ "2 [{'begin': 7, 'end': 16, 'mention': 'telescope... \n",
+ "3 [{'begin': 11, 'end': 18, 'mention': 'species'... \n",
+ "4 [{'begin': 0, 'end': 4, 'mention': 'most', 'ur... \n",
".. ... \n",
"245 [{'begin': 7, 'end': 22, 'mention': 'plastic s... \n",
"246 [{'begin': 0, 'end': 22, 'mention': 'air traff... \n",
- "247 [{'begin': 0, 'end': 8, 'mention': 'literacy',... \n",
+ "247 [{'begin': 15, 'end': 21, 'mention': 'africa',... \n",
"248 [{'begin': 0, 'end': 4, 'mention': 'term', 'ur... \n",
"249 [{'begin': 0, 'end': 8, 'mention': 'gasoline',... \n",
"\n",
"[250 rows x 4 columns]"
]
},
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -242,7 +252,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -260,7 +270,12 @@
" 'title': 'literacy rates Africa',\n",
" 'description': 'What are literacy rates in African countries?',\n",
" 'narrative': 'A relevant document will contain information about the\\nliteracy rate in an African country.\\nGeneral education levels that do not specifically include literacy rates\\nare not relevant.'},\n",
- " 'entities': [{'begin': 0,\n",
+ " 'entities': [{'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa',\n",
+ " 'score': 0.904072542157174},\n",
+ " {'begin': 0,\n",
" 'end': 8,\n",
" 'mention': 'literacy',\n",
" 'url': 'https://en.wikipedia.org/wiki/Literacy',\n",
@@ -270,14 +285,99 @@
" 'mention': 'literacy rates',\n",
" 'url': 'https://en.wikipedia.org/wiki/List_of_countries_by_literacy_rate',\n",
" 'score': 0.09090909090909001},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Roman_province)',\n",
+ " 'score': 0.016743557111040003},\n",
" {'begin': 9,\n",
" 'end': 14,\n",
" 'mention': 'rates',\n",
" 'url': 'https://en.wikipedia.org/wiki/Rates_(Póvoa_de_Varzim)',\n",
- " 'score': 0.012711864406779001}]}"
+ " 'score': 0.012711864406779001},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Toto_song)',\n",
+ " 'score': 0.00620426344257},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(TV_series)',\n",
+ " 'score': 0.001073814826598},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Petrarch)',\n",
+ " 'score': 0.0008749602290804964},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(journal)',\n",
+ " 'score': 0.0008749602290804964},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Karl_Wolf_song)',\n",
+ " 'score': 0.0007556474705695197},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Pharoah_Sanders_album)',\n",
+ " 'score': 0.0003181673560292714},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Perpetuum_Jazzile_album)',\n",
+ " 'score': 0.00019885459751829463},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Rose_Laurens_song)',\n",
+ " 'score': 0.00019885459751829463},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_in_Africa',\n",
+ " 'score': 0.00015908367801463572},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa,_Ohio',\n",
+ " 'score': 0.0001193127585109767},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa,_Indiana',\n",
+ " 'score': 0.0001193127585109767},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(Miriam_Makeba_album)',\n",
+ " 'score': 0.0001193127585109767},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(William_Billings)',\n",
+ " 'score': 0.0001193127585109767},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Africa_(film)',\n",
+ " 'score': 0.0001193127585109767},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Ifriqiya',\n",
+ " 'score': 7.954183900731785e-05},\n",
+ " {'begin': 15,\n",
+ " 'end': 21,\n",
+ " 'mention': 'africa',\n",
+ " 'url': 'https://en.wikipedia.org/wiki/Afrika_(video_game)',\n",
+ " 'score': 7.954183900731785e-05}]}"
]
},
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -288,7 +388,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -363,7 +463,7 @@
" 'score': 0.0003222687721559781}]}"
]
},
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -381,39 +481,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Download from the Incubator: https://files.webis.de/data-in-production/data-research/tira-zenodo-dump-preparation/query-processors-in-progress/marcel-gohsen-query-interpretation-trec-core.zip\n",
- "\tThis is only used for last spot checks before archival to Zenodo.\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Download: 100%|██████████| 98.5k/98.5k [00:00<00:00, 1.92MiB/s]"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Download finished. Extract...\n",
- "Extraction finished: /root/.tira/extracted_runs/ir-benchmarks/disks45-nocr-trec-robust-2004-20230209-training/marcel-gohsen\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- },
{
"data": {
"text/html": [
@@ -567,7 +637,7 @@
"[250 rows x 4 columns]"
]
},
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -579,7 +649,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -615,7 +685,7 @@
" 'score': 0.6129202512020191}]}"
]
},
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -626,7 +696,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -646,7 +716,7 @@
" 'score': 0.106382978723404}]}"
]
},
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}