From 6911268173f6aebfd847c19b67c344e89cab5b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maik=20Fr=C3=B6be?= Date: Wed, 20 Mar 2024 21:28:09 +0000 Subject: [PATCH] rename approach-ids --- .../tutorial-entity-linking-in-progress.ipynb | 192 ++++++++++++------ 1 file changed, 131 insertions(+), 61 deletions(-) diff --git a/tutorials/tutorial-entity-linking-in-progress.ipynb b/tutorials/tutorial-entity-linking-in-progress.ipynb index 7ae90f8..fc92d87 100644 --- a/tutorials/tutorial-entity-linking-in-progress.ipynb +++ b/tutorials/tutorial-entity-linking-in-progress.ipynb @@ -6,9 +6,9 @@ "id": "w8g9eAcFXPPh" }, "source": [ - "# A work in progress notebook for entity linking\n", + "# IR Lab Tutorial (Research Oriented): Entity Linking For Query Interpretation\n", "\n", - "(Submission is currently in progress, looks like we have to lowercase all queries before linking the entities, currently discussing this with Marcel)" + "This tutorial shows how to re-use / load entity linkings and derived query interpretations pre-computed in [TIREx](https://www.tira.io/tirex). Please have a look at the [corresponding paper](https://webis.de/publications.html?q=entity#kasturia_2022) for details." ] }, { @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -37,7 +37,17 @@ "id": "kP6nwrlJSSUw", "outputId": "14b6a45d-5b30-4a74-eb1a-ef60a47294e6" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n", + "\n", + "No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n" + ] + } + ], "source": [ "import pyterrier as pt\n", "\n", @@ -50,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -67,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -110,35 +120,35 @@ " 301\n", " International Organized Crime\n", " {'query_id': '301', 'title': 'International Or...\n", - " []\n", + " [{'begin': 14, 'end': 29, 'mention': 'organize...\n", " \n", " \n", " 1\n", " 302\n", " Poliomyelitis and Post-Polio\n", " {'query_id': '302', 'title': 'Poliomyelitis an...\n", - " []\n", + " [{'begin': 0, 'end': 5, 'mention': 'polio', 'u...\n", " \n", " \n", " 2\n", " 303\n", " Hubble Telescope Achievements\n", " {'query_id': '303', 'title': 'Hubble Telescope...\n", - " []\n", + " [{'begin': 7, 'end': 16, 'mention': 'telescope...\n", " \n", " \n", " 3\n", " 304\n", " Endangered Species (Mammals)\n", " {'query_id': '304', 'title': 'Endangered Speci...\n", - " []\n", + " [{'begin': 11, 'end': 18, 'mention': 'species'...\n", " \n", " \n", " 4\n", " 305\n", " Most Dangerous Vehicles\n", " {'query_id': '305', 'title': 'Most Dangerous V...\n", - " []\n", + " [{'begin': 0, 'end': 4, 'mention': 'most', 'ur...\n", " \n", " \n", " ...\n", @@ -166,7 +176,7 @@ " 698\n", " literacy rates Africa\n", " {'query_id': '698', 'title': 'literacy rates A...\n", - " [{'begin': 0, 'end': 8, 'mention': 'literacy',...\n", + " [{'begin': 15, 'end': 21, 'mention': 'africa',...\n", " \n", " \n", " 248\n", @@ -215,22 +225,22 @@ "249 {'query_id': '700', 'title': 'gasoline tax U.S... \n", "\n", " entities \n", - "0 [] \n", - "1 [] \n", - "2 [] \n", - "3 [] \n", - "4 [] \n", + "0 [{'begin': 14, 'end': 29, 'mention': 'organize... \n", + "1 [{'begin': 0, 'end': 5, 'mention': 'polio', 'u... \n", + "2 [{'begin': 7, 'end': 16, 'mention': 'telescope... \n", + "3 [{'begin': 11, 'end': 18, 'mention': 'species'... \n", + "4 [{'begin': 0, 'end': 4, 'mention': 'most', 'ur... \n", ".. ... \n", "245 [{'begin': 7, 'end': 22, 'mention': 'plastic s... \n", "246 [{'begin': 0, 'end': 22, 'mention': 'air traff... \n", - "247 [{'begin': 0, 'end': 8, 'mention': 'literacy',... \n", + "247 [{'begin': 15, 'end': 21, 'mention': 'africa',... \n", "248 [{'begin': 0, 'end': 4, 'mention': 'term', 'ur... \n", "249 [{'begin': 0, 'end': 8, 'mention': 'gasoline',... \n", "\n", "[250 rows x 4 columns]" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -242,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -260,7 +270,12 @@ " 'title': 'literacy rates Africa',\n", " 'description': 'What are literacy rates in African countries?',\n", " 'narrative': 'A relevant document will contain information about the\\nliteracy rate in an African country.\\nGeneral education levels that do not specifically include literacy rates\\nare not relevant.'},\n", - " 'entities': [{'begin': 0,\n", + " 'entities': [{'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa',\n", + " 'score': 0.904072542157174},\n", + " {'begin': 0,\n", " 'end': 8,\n", " 'mention': 'literacy',\n", " 'url': 'https://en.wikipedia.org/wiki/Literacy',\n", @@ -270,14 +285,99 @@ " 'mention': 'literacy rates',\n", " 'url': 'https://en.wikipedia.org/wiki/List_of_countries_by_literacy_rate',\n", " 'score': 0.09090909090909001},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Roman_province)',\n", + " 'score': 0.016743557111040003},\n", " {'begin': 9,\n", " 'end': 14,\n", " 'mention': 'rates',\n", " 'url': 'https://en.wikipedia.org/wiki/Rates_(Póvoa_de_Varzim)',\n", - " 'score': 0.012711864406779001}]}" + " 'score': 0.012711864406779001},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Toto_song)',\n", + " 'score': 0.00620426344257},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(TV_series)',\n", + " 'score': 0.001073814826598},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Petrarch)',\n", + " 'score': 0.0008749602290804964},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(journal)',\n", + " 'score': 0.0008749602290804964},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Karl_Wolf_song)',\n", + " 'score': 0.0007556474705695197},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Pharoah_Sanders_album)',\n", + " 'score': 0.0003181673560292714},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Perpetuum_Jazzile_album)',\n", + " 'score': 0.00019885459751829463},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Rose_Laurens_song)',\n", + " 'score': 0.00019885459751829463},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_in_Africa',\n", + " 'score': 0.00015908367801463572},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa,_Ohio',\n", + " 'score': 0.0001193127585109767},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa,_Indiana',\n", + " 'score': 0.0001193127585109767},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(Miriam_Makeba_album)',\n", + " 'score': 0.0001193127585109767},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(William_Billings)',\n", + " 'score': 0.0001193127585109767},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Africa_(film)',\n", + " 'score': 0.0001193127585109767},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Ifriqiya',\n", + " 'score': 7.954183900731785e-05},\n", + " {'begin': 15,\n", + " 'end': 21,\n", + " 'mention': 'africa',\n", + " 'url': 'https://en.wikipedia.org/wiki/Afrika_(video_game)',\n", + " 'score': 7.954183900731785e-05}]}" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -288,7 +388,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -363,7 +463,7 @@ " 'score': 0.0003222687721559781}]}" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -381,39 +481,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Download from the Incubator: https://files.webis.de/data-in-production/data-research/tira-zenodo-dump-preparation/query-processors-in-progress/marcel-gohsen-query-interpretation-trec-core.zip\n", - "\tThis is only used for last spot checks before archival to Zenodo.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Download: 100%|██████████| 98.5k/98.5k [00:00<00:00, 1.92MiB/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Download finished. Extract...\n", - "Extraction finished: /root/.tira/extracted_runs/ir-benchmarks/disks45-nocr-trec-robust-2004-20230209-training/marcel-gohsen\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "text/html": [ @@ -567,7 +637,7 @@ "[250 rows x 4 columns]" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -579,7 +649,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -615,7 +685,7 @@ " 'score': 0.6129202512020191}]}" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -626,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -646,7 +716,7 @@ " 'score': 0.106382978723404}]}" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" }