Skip to content

Commit

Permalink
rename approach-ids
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed Mar 20, 2024
1 parent 01e9689 commit 6911268
Showing 1 changed file with 131 additions and 61 deletions.
192 changes: 131 additions & 61 deletions tutorials/tutorial-entity-linking-in-progress.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
"id": "w8g9eAcFXPPh"
},
"source": [
"# A work in progress notebook for entity linking\n",
"# IR Lab Tutorial (Research Oriented): Entity Linking For Query Interpretation\n",
"\n",
"(Submission is currently in progress, looks like we have to lowercase all queries before linking the entities, currently discussing this with Marcel)"
"This tutorial shows how to re-use / load entity linkings and derived query interpretations pre-computed in [TIREx](https://www.tira.io/tirex). Please have a look at the [corresponding paper](https://webis.de/publications.html?q=entity#kasturia_2022) for details."
]
},
{
Expand All @@ -29,15 +29,25 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kP6nwrlJSSUw",
"outputId": "14b6a45d-5b30-4a74-eb1a-ef60a47294e6"
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n",
"\n",
"No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n"
]
}
],
"source": [
"import pyterrier as pt\n",
"\n",
Expand All @@ -50,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -67,7 +77,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand Down Expand Up @@ -110,35 +120,35 @@
" <td>301</td>\n",
" <td>International Organized Crime</td>\n",
" <td>{'query_id': '301', 'title': 'International Or...</td>\n",
" <td>[]</td>\n",
" <td>[{'begin': 14, 'end': 29, 'mention': 'organize...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>302</td>\n",
" <td>Poliomyelitis and Post-Polio</td>\n",
" <td>{'query_id': '302', 'title': 'Poliomyelitis an...</td>\n",
" <td>[]</td>\n",
" <td>[{'begin': 0, 'end': 5, 'mention': 'polio', 'u...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>303</td>\n",
" <td>Hubble Telescope Achievements</td>\n",
" <td>{'query_id': '303', 'title': 'Hubble Telescope...</td>\n",
" <td>[]</td>\n",
" <td>[{'begin': 7, 'end': 16, 'mention': 'telescope...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>304</td>\n",
" <td>Endangered Species (Mammals)</td>\n",
" <td>{'query_id': '304', 'title': 'Endangered Speci...</td>\n",
" <td>[]</td>\n",
" <td>[{'begin': 11, 'end': 18, 'mention': 'species'...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>305</td>\n",
" <td>Most Dangerous Vehicles</td>\n",
" <td>{'query_id': '305', 'title': 'Most Dangerous V...</td>\n",
" <td>[]</td>\n",
" <td>[{'begin': 0, 'end': 4, 'mention': 'most', 'ur...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
Expand Down Expand Up @@ -166,7 +176,7 @@
" <td>698</td>\n",
" <td>literacy rates Africa</td>\n",
" <td>{'query_id': '698', 'title': 'literacy rates A...</td>\n",
" <td>[{'begin': 0, 'end': 8, 'mention': 'literacy',...</td>\n",
" <td>[{'begin': 15, 'end': 21, 'mention': 'africa',...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>248</th>\n",
Expand Down Expand Up @@ -215,22 +225,22 @@
"249 {'query_id': '700', 'title': 'gasoline tax U.S... \n",
"\n",
" entities \n",
"0 [] \n",
"1 [] \n",
"2 [] \n",
"3 [] \n",
"4 [] \n",
"0 [{'begin': 14, 'end': 29, 'mention': 'organize... \n",
"1 [{'begin': 0, 'end': 5, 'mention': 'polio', 'u... \n",
"2 [{'begin': 7, 'end': 16, 'mention': 'telescope... \n",
"3 [{'begin': 11, 'end': 18, 'mention': 'species'... \n",
"4 [{'begin': 0, 'end': 4, 'mention': 'most', 'ur... \n",
".. ... \n",
"245 [{'begin': 7, 'end': 22, 'mention': 'plastic s... \n",
"246 [{'begin': 0, 'end': 22, 'mention': 'air traff... \n",
"247 [{'begin': 0, 'end': 8, 'mention': 'literacy',... \n",
"247 [{'begin': 15, 'end': 21, 'mention': 'africa',... \n",
"248 [{'begin': 0, 'end': 4, 'mention': 'term', 'ur... \n",
"249 [{'begin': 0, 'end': 8, 'mention': 'gasoline',... \n",
"\n",
"[250 rows x 4 columns]"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -242,7 +252,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand All @@ -260,7 +270,12 @@
" 'title': 'literacy rates Africa',\n",
" 'description': 'What are literacy rates in African countries?',\n",
" 'narrative': 'A relevant document will contain information about the\\nliteracy rate in an African country.\\nGeneral education levels that do not specifically include literacy rates\\nare not relevant.'},\n",
" 'entities': [{'begin': 0,\n",
" 'entities': [{'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa',\n",
" 'score': 0.904072542157174},\n",
" {'begin': 0,\n",
" 'end': 8,\n",
" 'mention': 'literacy',\n",
" 'url': 'https://en.wikipedia.org/wiki/Literacy',\n",
Expand All @@ -270,14 +285,99 @@
" 'mention': 'literacy rates',\n",
" 'url': 'https://en.wikipedia.org/wiki/List_of_countries_by_literacy_rate',\n",
" 'score': 0.09090909090909001},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Roman_province)',\n",
" 'score': 0.016743557111040003},\n",
" {'begin': 9,\n",
" 'end': 14,\n",
" 'mention': 'rates',\n",
" 'url': 'https://en.wikipedia.org/wiki/Rates_(Póvoa_de_Varzim)',\n",
" 'score': 0.012711864406779001}]}"
" 'score': 0.012711864406779001},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Toto_song)',\n",
" 'score': 0.00620426344257},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(TV_series)',\n",
" 'score': 0.001073814826598},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Petrarch)',\n",
" 'score': 0.0008749602290804964},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(journal)',\n",
" 'score': 0.0008749602290804964},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Karl_Wolf_song)',\n",
" 'score': 0.0007556474705695197},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Pharoah_Sanders_album)',\n",
" 'score': 0.0003181673560292714},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Perpetuum_Jazzile_album)',\n",
" 'score': 0.00019885459751829463},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Rose_Laurens_song)',\n",
" 'score': 0.00019885459751829463},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_in_Africa',\n",
" 'score': 0.00015908367801463572},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa,_Ohio',\n",
" 'score': 0.0001193127585109767},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa,_Indiana',\n",
" 'score': 0.0001193127585109767},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(Miriam_Makeba_album)',\n",
" 'score': 0.0001193127585109767},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(William_Billings)',\n",
" 'score': 0.0001193127585109767},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Africa_(film)',\n",
" 'score': 0.0001193127585109767},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Ifriqiya',\n",
" 'score': 7.954183900731785e-05},\n",
" {'begin': 15,\n",
" 'end': 21,\n",
" 'mention': 'africa',\n",
" 'url': 'https://en.wikipedia.org/wiki/Afrika_(video_game)',\n",
" 'score': 7.954183900731785e-05}]}"
]
},
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -288,7 +388,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down Expand Up @@ -363,7 +463,7 @@
" 'score': 0.0003222687721559781}]}"
]
},
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -381,39 +481,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Download from the Incubator: https://files.webis.de/data-in-production/data-research/tira-zenodo-dump-preparation/query-processors-in-progress/marcel-gohsen-query-interpretation-trec-core.zip\n",
"\tThis is only used for last spot checks before archival to Zenodo.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Download: 100%|██████████| 98.5k/98.5k [00:00<00:00, 1.92MiB/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Download finished. Extract...\n",
"Extraction finished: /root/.tira/extracted_runs/ir-benchmarks/disks45-nocr-trec-robust-2004-20230209-training/marcel-gohsen\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/html": [
Expand Down Expand Up @@ -567,7 +637,7 @@
"[250 rows x 4 columns]"
]
},
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -579,7 +649,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -615,7 +685,7 @@
" 'score': 0.6129202512020191}]}"
]
},
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -626,7 +696,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -646,7 +716,7 @@
" 'score': 0.106382978723404}]}"
]
},
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit 6911268

Please sign in to comment.