diff --git a/tutorials/tutorial-entity-linking.ipynb b/tutorials/tutorial-entity-linking.ipynb deleted file mode 100644 index e615810..0000000 --- a/tutorials/tutorial-entity-linking.ipynb +++ /dev/null @@ -1,2273 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# A work in progress notebook for entity linking\n", - "\n", - "(Submission is currently in progress, looks like we have to lowercase all queries before linking the entities, currently discussing this with Marcel)" - ], - "metadata": { - "id": "w8g9eAcFXPPh" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GCPbVYynSBnZ" - }, - "outputs": [], - "source": [ - "# Only needed in Colab, in codespaces everything is already installed.\n", - "!pip3 install python-terrier tira ir-datasets" - ] - }, - { - "cell_type": "code", - "source": [ - "import pyterrier as pt\n", - "from tqdm import tqdm\n", - "import pandas as pd\n", - "\n", - "if not pt.started():\n", - " pt.init()\n", - "\n", - "from tira.rest_api_client import Client\n", - "tira = Client()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kP6nwrlJSSUw", - "outputId": "cfc5b500-1154-44c2-d336-950cc8fe2090" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "terrier-assemblies 5.8 jar-with-dependencies not found, downloading to /root/.pyterrier...\n", - "Done\n", - "terrier-python-helper 0.0.8 jar not found, downloading to /root/.pyterrier...\n", - "Done\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Example Evaluation on a single dataset" - ], - "metadata": { - "id": "06YM0s0Qrh0H" - } - }, - { - "cell_type": "code", - "source": [ - "dataset = pt.get_dataset(\"irds:disks45/nocr/trec-robust-2004\")\n", - "topics = dataset.get_topics(variant='title')\n", - "topics.head(3)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 143 - }, - "id": "n8keQrBMVUR_", - "outputId": "ca5ebf50-34cc-4c3f-8e76-ef7fc850ad81" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " qid query\n", - "0 301 international organized crime\n", - "1 302 poliomyelitis and post polio\n", - "2 303 hubble telescope achievements" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qidquery
0301international organized crime
1302poliomyelitis and post polio
2303hubble telescope achievements
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "topics", - "summary": "{\n \"name\": \"topics\",\n \"rows\": 250,\n \"fields\": [\n {\n \"column\": \"qid\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 250,\n \"samples\": [\n \"443\",\n \"307\",\n \"398\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"query\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 249,\n \"samples\": [\n \"inventions scientific discoveries\",\n \"new hydroelectric projects\",\n \"dismantling europe s arsenal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 6 - } - ] - }, - { - "cell_type": "code", - "source": [ - "query_entity_linking = tira.pt.transform_queries('ir-benchmarks/marcel-gohsen/wood-block', dataset)\n", - "query_entity_linking(topics).head(3)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 143 - }, - "id": "tqOFa0PJVcvW", - "outputId": "6d041b6c-9b86-4b3a-a8e7-57cee32a4495" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " qid query \\\n", - "0 301 International Organized Crime \n", - "1 302 Poliomyelitis and Post-Polio \n", - "2 303 Hubble Telescope Achievements \n", - "\n", - " original_query \\\n", - "0 {'query_id': '301', 'title': 'International Or... \n", - "1 {'query_id': '302', 'title': 'Poliomyelitis an... \n", - "2 {'query_id': '303', 'title': 'Hubble Telescope... \n", - "\n", - " entities \n", - "0 [{'begin': 14, 'end': 29, 'mention': 'organize... \n", - "1 [{'begin': 0, 'end': 5, 'mention': 'polio', 'u... \n", - "2 [{'begin': 7, 'end': 16, 'mention': 'telescope... " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qidqueryoriginal_queryentities
0301International Organized Crime{'query_id': '301', 'title': 'International Or...[{'begin': 14, 'end': 29, 'mention': 'organize...
1302Poliomyelitis and Post-Polio{'query_id': '302', 'title': 'Poliomyelitis an...[{'begin': 0, 'end': 5, 'mention': 'polio', 'u...
2303Hubble Telescope Achievements{'query_id': '303', 'title': 'Hubble Telescope...[{'begin': 7, 'end': 16, 'mention': 'telescope...
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"query_entity_linking(topics)\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"qid\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"301\",\n \"302\",\n \"303\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"query\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"International Organized Crime\",\n \"Poliomyelitis and Post-Polio\",\n \"Hubble Telescope Achievements\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"original_query\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"entities\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 7 - } - ] - }, - { - "cell_type": "code", - "source": [ - "query_entity_linking(topics).iloc[0].to_dict()" - ], - "metadata": { - "id": "fBVvxCBJ9Cho", - "outputId": "863032a5-f47c-4ca6-ee31-1c4a423df18a", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "{'qid': '301',\n", - " 'query': 'International Organized Crime',\n", - " 'original_query': {'query_id': '301',\n", - " 'title': 'International Organized Crime',\n", - " 'description': 'Identify organizations that participate in international criminal\\nactivity, the activity, and, if possible, collaborating organizations\\nand the countries involved.',\n", - " 'narrative': 'A relevant document must as a minimum identify the organization and the\\ntype of illegal activity (e.g., Columbian cartel exporting cocaine).\\nVague references to international drug trade without identification of\\nthe organization(s) involved would not be relevant.'},\n", - " 'entities': [{'begin': 14,\n", - " 'end': 29,\n", - " 'mention': 'organized crime',\n", - " 'url': 'https://en.wikipedia.org/wiki/Organized_crime',\n", - " 'score': 0.97188995215311},\n", - " {'begin': 0,\n", - " 'end': 29,\n", - " 'mention': 'international organized crime',\n", - " 'url': 'https://en.wikipedia.org/wiki/Transnational_organized_crime',\n", - " 'score': 0.33333333333333304},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/WTA_International_tournaments',\n", - " 'score': 0.049538610976202005},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International',\n", - " 'score': 0.03399708596406},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_(Amtrak_train)',\n", - " 'score': 0.010199125789218002},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/Navistar_International',\n", - " 'score': 0.008742107819329001},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_Harvester',\n", - " 'score': 0.007770762506070001},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_(GN_train)',\n", - " 'score': 0.004856726566294},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_(The_Three_Degrees_album)',\n", - " 'score': 0.0038853812530350003},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_(Chase_&_Status_song)',\n", - " 'score': 0.003399708596406},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/Political_international',\n", - " 'score': 0.003399708596406},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_sport',\n", - " 'score': 0.0029140359397760002},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_(New_Order_album)',\n", - " 'score': 0.0029140359397760002},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': \"https://en.wikipedia.org/wiki/International_Workingmen's_Association\",\n", - " 'score': 0.002428363283147},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_(Kevin_Michael_album)',\n", - " 'score': 0.0019426906265170001},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/Molde_FK',\n", - " 'score': 0.0009713453132588635},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/Communist_International',\n", - " 'score': 0.0009713453132588635},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/Cap_(sport)',\n", - " 'score': 0.0009713453132588635},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/Second_International',\n", - " 'score': 0.00048567265662943174},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_music',\n", - " 'score': 0.00048567265662943174},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_Paint',\n", - " 'score': 0.00048567265662943174},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/International_(steamship)',\n", - " 'score': 0.00048567265662943174},\n", - " {'begin': 0,\n", - " 'end': 13,\n", - " 'mention': 'international',\n", - " 'url': 'https://en.wikipedia.org/wiki/Pirate_Parties_International',\n", - " 'score': 0.00048567265662943174}]}" - ] - }, - "metadata": {}, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "source": [ - "query_entity_linking(topics).iloc[247].to_dict()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "08x9c7tjWTAe", - "outputId": "24400239-1ae7-4682-ae86-cf2d01b2c14e" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "{'qid': '698',\n", - " 'query': 'literacy rates Africa',\n", - " 'original_query': {'query_id': '698',\n", - " 'title': 'literacy rates Africa',\n", - " 'description': 'What are literacy rates in African countries?',\n", - " 'narrative': 'A relevant document will contain information about the\\nliteracy rate in an African country.\\nGeneral education levels that do not specifically include literacy rates\\nare not relevant.'},\n", - " 'entities': [{'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa',\n", - " 'score': 0.904072542157174},\n", - " {'begin': 0,\n", - " 'end': 8,\n", - " 'mention': 'literacy',\n", - " 'url': 'https://en.wikipedia.org/wiki/Literacy',\n", - " 'score': 0.710061993323795},\n", - " {'begin': 0,\n", - " 'end': 14,\n", - " 'mention': 'literacy rates',\n", - " 'url': 'https://en.wikipedia.org/wiki/List_of_countries_by_literacy_rate',\n", - " 'score': 0.09090909090909001},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Roman_province)',\n", - " 'score': 0.016743557111040003},\n", - " {'begin': 9,\n", - " 'end': 14,\n", - " 'mention': 'rates',\n", - " 'url': 'https://en.wikipedia.org/wiki/Rates_(Póvoa_de_Varzim)',\n", - " 'score': 0.012711864406779001},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Toto_song)',\n", - " 'score': 0.00620426344257},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(TV_series)',\n", - " 'score': 0.001073814826598},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Petrarch)',\n", - " 'score': 0.0008749602290804964},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(journal)',\n", - " 'score': 0.0008749602290804964},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Karl_Wolf_song)',\n", - " 'score': 0.0007556474705695197},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Pharoah_Sanders_album)',\n", - " 'score': 0.0003181673560292714},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Perpetuum_Jazzile_album)',\n", - " 'score': 0.00019885459751829463},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Rose_Laurens_song)',\n", - " 'score': 0.00019885459751829463},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_in_Africa',\n", - " 'score': 0.00015908367801463572},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa,_Ohio',\n", - " 'score': 0.0001193127585109767},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa,_Indiana',\n", - " 'score': 0.0001193127585109767},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(Miriam_Makeba_album)',\n", - " 'score': 0.0001193127585109767},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(William_Billings)',\n", - " 'score': 0.0001193127585109767},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Africa_(film)',\n", - " 'score': 0.0001193127585109767},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Ifriqiya',\n", - " 'score': 7.954183900731785e-05},\n", - " {'begin': 15,\n", - " 'end': 21,\n", - " 'mention': 'africa',\n", - " 'url': 'https://en.wikipedia.org/wiki/Afrika_(video_game)',\n", - " 'score': 7.954183900731785e-05}],\n", - " 'entity_count': 21}" - ] - }, - "metadata": {}, - "execution_count": 7 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Example Evaluation on All Datasets" - ], - "metadata": { - "id": "EEgY_6fFru2P" - } - }, - { - "cell_type": "code", - "source": [ - "ir_datasets = ['clueweb09/en/trec-web-2009', 'clueweb09/en/trec-web-2010', 'clueweb09/en/trec-web-2011',\n", - " 'clueweb09/en/trec-web-2012', 'clueweb12/trec-web-2013', 'clueweb12/trec-web-2014',\n", - " 'disks45/nocr/trec-robust-2004', 'antique/test', 'argsme/2020-04-01/touche-2020-task-1',\n", - " 'argsme/2020-04-01/touche-2021-task-1', 'clueweb12/touche-2020-task-2', 'clueweb12/touche-2021-task-2',\n", - " 'cord19/fulltext/trec-covid', 'cranfield', 'disks45/nocr/trec7', 'disks45/nocr/trec8',\n", - " 'gov/trec-web-2002', 'gov/trec-web-2003', 'gov/trec-web-2004', 'gov2/trec-tb-2004',\n", - " 'gov2/trec-tb-2005', 'gov2/trec-tb-2006', 'medline/2004/trec-genomics-2004',\n", - " 'medline/2004/trec-genomics-2005', 'medline/2017/trec-pm-2017', 'medline/2017/trec-pm-2018',\n", - " 'msmarco-passage/trec-dl-2019/judged', 'msmarco-passage/trec-dl-2020/judged',\n", - " 'nfcorpus/test', 'vaswani', 'wapo/v2/trec-core-2018']\n", - "df_all = []\n", - "\n", - "for dataset in tqdm(ir_datasets):\n", - " pt_dataset = pt.get_dataset(f\"irds:{dataset}\")\n", - " topics = pt_dataset.get_topics()\n", - " query_entity_linking = tira.pt.transform_queries('ir-benchmarks/marcel-gohsen/wood-block', dataset)\n", - " df = query_entity_linking(topics)\n", - " df['dataset'] = dataset\n", - "\n", - " df_all += [df]\n", - "\n", - "df_all = pd.concat(df_all)\n", - "df_all" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 211 - }, - "id": "KubJXyAsV8cX", - "outputId": "66aac083-8210-4cb2-8d22-7382092adf9b" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "error", - "ename": "NameError", - "evalue": "name 'tqdm' is not defined", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mdf_all\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mdataset\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mir_datasets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0mpt_dataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"irds:{dataset}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mtopics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpt_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_topics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'tqdm' is not defined" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "df_all['entity_count'] = df_all['entities'].apply(lambda i: len(i))\n", - "df_all.sort_values('entity_count')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "KIE94elCuMCu", - "outputId": "0e173b01-246e-43ea-b170-6b2d3b013092" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " qid query \\\n", - "2 53 Should blood donations be financially compensa... \n", - "39 40 michworks \n", - "96 2180086 what are mormens? \n", - "44 145 vines for shade \n", - "46 47 indexed annuity \n", - ".. ... ... \n", - "61 103 how far around a cylinder and under what condi... \n", - "153 2452795 How can I heat the water within a 150 gal tank... \n", - "136 206 have any analytical studies been conducted on ... \n", - "91 140 given complete freedom in the design of an air... \n", - "28 54 what is the effect of cross sectional shape on... \n", - "\n", - " description type \\\n", - "2 NaN NaN \n", - "39 Find information on getting a job in Michigan.... faceted \n", - "96 NaN NaN \n", - "44 \\n information on vines that can be grown i... ambiguous \n", - "46 I'm looking for information about indexed annu... faceted \n", - ".. ... ... \n", - "61 NaN NaN \n", - "153 NaN NaN \n", - "136 NaN NaN \n", - "91 NaN NaN \n", - "28 NaN NaN \n", - "\n", - " subtopics \\\n", - "2 NaN \n", - "39 ((1, \\n Take me to the michworks Michigan T... \n", - "96 NaN \n", - "44 ((1, \\n information on vines that can be gr... \n", - "46 ((1, \\n What is an indexed annuity? What a... \n", - ".. ... \n", - "61 NaN \n", - "153 NaN \n", - "136 NaN \n", - "91 NaN \n", - "28 NaN \n", - "\n", - " original_query \\\n", - "2 {'query_id': '53', 'title': 'Should blood dona... \n", - "39 {'query_id': '40', 'query': 'michworks', 'desc... \n", - "96 {'query_id': '2180086', 'text': 'what are morm... \n", - "44 {'query_id': '145', 'query': 'vines for shade'... \n", - "46 {'query_id': '47', 'query': 'indexed annuity',... \n", - ".. ... \n", - "61 {'query_id': '103', 'text': 'how far around a ... \n", - "153 {'query_id': '2452795', 'text': 'How can I hea... \n", - "136 {'query_id': '206', 'text': 'have any analytic... \n", - "91 {'query_id': '140', 'text': 'given complete fr... \n", - "28 {'query_id': '54', 'text': 'what is the effect... \n", - "\n", - " entities \\\n", - "2 [] \n", - "39 [] \n", - "96 [] \n", - "44 [] \n", - "46 [] \n", - ".. ... \n", - "61 [{'begin': 123, 'end': 129, 'mention': 'linear... \n", - "153 [{'begin': 19, 'end': 24, 'mention': 'water', ... \n", - "136 [{'begin': 33, 'end': 42, 'mention': 'conducte... \n", - "91 [{'begin': 43, 'end': 51, 'mention': 'airplane... \n", - "28 [{'begin': 47, 'end': 55, 'mention': 'the flow... \n", - "\n", - " dataset title narrative need context \\\n", - "2 argsme/2020-04-01/touche-2021-task-1 NaN NaN NaN NaN \n", - "39 clueweb09/en/trec-web-2009 NaN NaN NaN NaN \n", - "96 antique/test NaN NaN NaN NaN \n", - "44 clueweb09/en/trec-web-2011 NaN NaN NaN NaN \n", - "46 clueweb09/en/trec-web-2009 NaN NaN NaN NaN \n", - ".. ... ... ... ... ... \n", - "61 cranfield NaN NaN NaN NaN \n", - "153 antique/test NaN NaN NaN NaN \n", - "136 cranfield NaN NaN NaN NaN \n", - "91 cranfield NaN NaN NaN NaN \n", - "28 cranfield NaN NaN NaN NaN \n", - "\n", - " disease gene demographic other all entity_count \n", - "2 NaN NaN NaN NaN NaN 0 \n", - "39 NaN NaN NaN NaN NaN 0 \n", - "96 NaN NaN NaN NaN NaN 0 \n", - "44 NaN NaN NaN NaN NaN 0 \n", - "46 NaN NaN NaN NaN NaN 0 \n", - ".. ... ... ... ... ... ... \n", - "61 NaN NaN NaN NaN NaN 216 \n", - "153 NaN NaN NaN NaN NaN 221 \n", - "136 NaN NaN NaN NaN NaN 222 \n", - "91 NaN NaN NaN NaN NaN 250 \n", - "28 NaN NaN NaN NaN NaN 259 \n", - "\n", - "[2544 rows x 18 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qidquerydescriptiontypesubtopicsoriginal_queryentitiesdatasettitlenarrativeneedcontextdiseasegenedemographicotherallentity_count
253Should blood donations be financially compensa...NaNNaNNaN{'query_id': '53', 'title': 'Should blood dona...[]argsme/2020-04-01/touche-2021-task-1NaNNaNNaNNaNNaNNaNNaNNaNNaN0
3940michworksFind information on getting a job in Michigan....faceted((1, \\n Take me to the michworks Michigan T...{'query_id': '40', 'query': 'michworks', 'desc...[]clueweb09/en/trec-web-2009NaNNaNNaNNaNNaNNaNNaNNaNNaN0
962180086what are mormens?NaNNaNNaN{'query_id': '2180086', 'text': 'what are morm...[]antique/testNaNNaNNaNNaNNaNNaNNaNNaNNaN0
44145vines for shade\\n information on vines that can be grown i...ambiguous((1, \\n information on vines that can be gr...{'query_id': '145', 'query': 'vines for shade'...[]clueweb09/en/trec-web-2011NaNNaNNaNNaNNaNNaNNaNNaNNaN0
4647indexed annuityI'm looking for information about indexed annu...faceted((1, \\n What is an indexed annuity? What a...{'query_id': '47', 'query': 'indexed annuity',...[]clueweb09/en/trec-web-2009NaNNaNNaNNaNNaNNaNNaNNaNNaN0
.........................................................
61103how far around a cylinder and under what condi...NaNNaNNaN{'query_id': '103', 'text': 'how far around a ...[{'begin': 123, 'end': 129, 'mention': 'linear...cranfieldNaNNaNNaNNaNNaNNaNNaNNaNNaN216
1532452795How can I heat the water within a 150 gal tank...NaNNaNNaN{'query_id': '2452795', 'text': 'How can I hea...[{'begin': 19, 'end': 24, 'mention': 'water', ...antique/testNaNNaNNaNNaNNaNNaNNaNNaNNaN221
136206have any analytical studies been conducted on ...NaNNaNNaN{'query_id': '206', 'text': 'have any analytic...[{'begin': 33, 'end': 42, 'mention': 'conducte...cranfieldNaNNaNNaNNaNNaNNaNNaNNaNNaN222
91140given complete freedom in the design of an air...NaNNaNNaN{'query_id': '140', 'text': 'given complete fr...[{'begin': 43, 'end': 51, 'mention': 'airplane...cranfieldNaNNaNNaNNaNNaNNaNNaNNaNNaN250
2854what is the effect of cross sectional shape on...NaNNaNNaN{'query_id': '54', 'text': 'what is the effect...[{'begin': 47, 'end': 55, 'mention': 'the flow...cranfieldNaNNaNNaNNaNNaNNaNNaNNaNNaN259
\n", - "

2544 rows × 18 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"df_all\",\n \"rows\": 2544,\n \"fields\": [\n {\n \"column\": \"qid\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1372,\n \"samples\": [\n \"126\",\n \"262\",\n \"423\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"query\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2412,\n \"samples\": [\n \"Proteins involved in the nerve growth factor pathway\",\n \"is dragon fruit good for you ?\",\n \"Why do some people only go to church on Easter Sunday and never go again until Christmas ?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"description\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1045,\n \"samples\": [\n \"What are the pros and cons of adults using human growth hormone (HGH)?\",\n \"what is known about an mRNA vaccine for the SARS-CoV-2 virus?\",\n \"how has COVID-19 affected Canada\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"faceted\",\n \"ambiguous\",\n \"single\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"subtopics\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 252,\n \"samples\": [\n [\n [\n \"1\",\n \"\\n Find reviews of PlayStation 2 games.\\n \",\n \"inf\"\n ],\n [\n \"2\",\n \"\\n Where can I find cheat codes for PlayStation 2 games?\\n \",\n \"inf\"\n ],\n [\n \"3\",\n \"\\n I'm looking for sites that announce new PlayStation 2 games.\\n \",\n \"inf\"\n ],\n [\n \"4\",\n \"\\n Where can I buy used PlayStation 2 games?\\n \",\n \"inf\"\n ],\n [\n \"5\",\n \"\\n What are the specifications of the PlayStation 2 console?\\n \",\n \"inf\"\n ]\n ],\n [\n [\n \"1\",\n \"\\n How are VLDL levels determined?\\n \",\n \"inf\"\n ],\n [\n \"2\",\n \"\\n What are good and bad levels of VLDL in a cholesterol test?\\n \",\n \"inf\"\n ],\n [\n \"3\",\n \"\\n What is the difference between LDL, HDL, and VLDL?\\n \",\n \"inf\"\n ]\n ],\n [\n [\n \"1\",\n \"\\n Find a picture of a blue-throated hummingbird.\\n \",\n \"nav\"\n ],\n [\n \"2\",\n \"\\n What is the scientific name of the blue-throated hummingbird?\\n \",\n \"nav\"\n ],\n [\n \"3\",\n \"\\n What are the migration patterns of the blue-throated hummingbird?\\n \",\n \"inf\"\n ],\n [\n \"4\",\n \"\\n The blue-throated hummingbird is native to what areas?\\n \",\n \"inf\"\n ]\n ]\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"original_query\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"entities\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 31,\n \"samples\": [\n \"vaswani\",\n \"clueweb12/trec-web-2013\",\n \"cord19/fulltext/trec-covid\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1096,\n \"samples\": [\n \"Geysers\",\n \"walnut oil\",\n \"splenda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"narrative\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 699,\n \"samples\": [\n \"Any ship loss due to weather is relevant, either in international\\nor coastal waters.\",\n \"Is quantum mechanics the same as quantum physics? If no, what are the differences between the two disciplines? Highly relevant documents should contain enough information to settle these two questions. In what way is quantum physics similar to quantum mechanics and how they are different. At the end, a user should ideally be able to explicitly differentiate one from the other. Relevant documents will be helpful to better understand qualities of one of the disciplines. Any document that is part of a curriculum and does not provide comparisons is not considered relevant.\",\n \"Documents that merely mention the name of a company\\nor group that produces encryption equipment but does\\nnot mention the exportation and/or commercial exploitation\\nof the encryption equipment are not relevant. Documents \\nwhich refer to governmental access into the encryption \\nsystems for the purposes of counter-intelligence or \\nanti-crime activities are relevant.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"need\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 50,\n \"samples\": [\n \"Which Saccharomyces cerevisiae proteins are involved in the ubiquitin proteolytic pathway?\",\n \"Studies that investigate similarities in morphological changes among apoptosis and autophagy processes.\",\n \"Find reports that describe xenograft models of human cancers.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"context\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 50,\n \"samples\": [\n \"The researcher identified a protein in another yeast species and wants to compare it to the same one in Saccharomyces cerevisiae.\",\n \"Collection of information regarding the potential relationship between apoptosis and autophagy.\",\n \"A xenograft animal model of cancer is one in which foreign tumor tissue is grafted into animals, usually rodents, providing a means to test various compounds for their ability to slow or halt tumor growth.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"disease\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 39,\n \"samples\": [\n \"Gastric cancer\",\n \"Prostate cancer\",\n \"neuroblastoma\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gene\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 71,\n \"samples\": [\n \"ERBB3\",\n \"ABL1\",\n \"CDK4 Amplification\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"demographic\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 55,\n \"samples\": [\n \"39-year-old female\",\n \"46-year-old female\",\n \"62-year-old female\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"other\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 18,\n \"samples\": [\n \"None\",\n \"Hypertension, Hypercholesterolemia\",\n \"Lupus\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"all\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 325,\n \"samples\": [\n \"amnesia - - nausea , memory , mackerel , infants , neurotoxins , pregnancy , tuna , seafood , sardines , halibut , foodborne illness , children , brain health , brain disease , anchovies - -\",\n \"brca genes - - mortality , phytonutrients , plant protein , metastases , lifespan , ldl cholesterol , legumes , lentils , protein , soy , vegetable protein , weight loss , women 's health , triglycerides , tofu - -\",\n \"are multivitamins good for you ? so some vitamins may make you live longer , some may make you live a shorter life . what if you put them all together . do people who take multivitamin supplements live longer or shorter lives than those who don \\u2019 t ? all just seems to cancel out and give you expensive pee . please feel free to post any ask-the-doctor type questions here in the comments section and i \\u2019 d be happy to try to answer them . and check out the other \\u201c hhh \\u201d videos ( harmful , harmless , or helpful ? ) . also , there are over a thousand subjects covered in the rest of my videos \\u2013 please feel free to explore them as well ! as far as weight loss goes , l-arginine and white bean extract were mentioned on the dr. oz show recently and i \\u2019 m wondering what your opinion is on these supplements ? as a dietitian , i \\u2019 m hesitant to recommend any type of supplement because we are always taught \\u201c food first \\u201d but if something is safe and effective in blocking the absorption of ( some ) carbs or increasing metabolism , it could potentially be another tool in our arsenal to help people who are trying to lose weight . it would take some significant data to convince me to actually recommend supplements for weight loss , but just curious what your opinion is . thanks ! i share your concern about recommending supplements especially as it relates to weight loss . i recommend the only diet shown to work over time \\u2026 the \\u201c ad libitum \\u201d low fat plant based diet with b12 supplementation . i particularly like and often cite the study , shintani et al . , the hawaii diet : ad libitum high carbohydrate , low fat diet for reduction of chronic disease risk factors : obesity , hypertension , hypercholesterolemia , and hyperglycemia , hawaii med j 60 : 69-73 ; mar 2001 . i think the key concept for patients to understand is \\u201c calorie density \\u201d and not \\u201c calories \\u201d . i recommend and keep loaner copies on hand of jeff novick \\u2019 s dvd , \\u201c calorie density : how to eat more , weigh less and live longer \\u201d . he discusses the important concepts of satiety and calorie density and ties the latter to the amount of exercise a patient does . he shows how folks can lose \\u201c weight \\u201d without exercising . i also recommend john mcdougall \\u2019 s newsletter article ( 12 / 08 ) , the fat vegan , so folks who adopt a plant based diet can avoid some of the behaviors that will thwart their efforts . neal barnard \\u2019 s \\u201c breaking the food seduction \\u201d is also helpful for many of my patients to help understand that the issue is \\u201c addiction \\u201d not weak will \\u2026 it is available as a book or dvd . resources beyond that depend on the patients individual circumstances . i \\u2019 ve been practicing primary care medicine for over 30 years and the science is clear on the best approach . i \\u2019 ve seen many diets come and go and many \\u201c supplement \\u201d recommendations come and go . i don \\u2019 t recommend any of them . calorie restricted diets don \\u2019 t work in the long run except for a very small % of patients . diets like the atkins diet have been shown not to be healthy . we can avoid chemicals see http : / / nutritionfacts.org / videos / obesity-causing-pollutants-in-food / and start adopting the best overall diet see .. http : / / nutritionfacts.org / videos / thousands-of-vegans-studied / . i am a fan of understanding the science and the complexity to help us understand what works for patients see \\u2026 http : / / nutritionfacts.org / videos / how-to-upregulate-metabolism / but we have to be able to give our patients straight forward practical information and avoid jumping on the latest bandwagon whether that is a supplement or the newest fad diet . for example i believe the best practical starter handout is pcrm \\u2019 s vegetarian starter kit available as free download on their website . keep tuned to nutritionfacts.org as the science keeps changing .. where can i find a pcp like you ? out of curiosity , do you have any \\u201c primal \\u201d or \\u201c paleo \\u201d folks coming to your practice these days ? it seems like this another diet trend / fad that is taking hold of the public and medical profession as well ( our local news show even -uncritically- featured this type of diet for a week in its medical / health segment ) . dr. greger , i \\u2019 ve got 3 questions on vitamin supplements that i couldn \\u2019 t find answers ( or should i say , reliable answers ) on the internet . 1 . many supplements contain magnesium stearate , which acts as a lubricant to prevent tablet and capsule contents from sticking to the machinery during production . some say it is harmless , some say harmful . what \\u2019 s your take on this issue ? 2 . are \\u201c whole food vitamins \\u201d really better than \\u201c synthetic vitamins \\u201d ? they do sound better , but if you consider how the manufactures would have to do to extract those nutrients from food \\u2026 no one has looked into how those are extracted or made , which might add potential contaminations . am i thinking too much ? 3 . what does \\u201c organic vitamin \\u201d really mean ? if it is synthetic ( looks so to me from reading their labels , if whole food is used , they sure would mention it ) , how can it be \\u201c organic ? thank you ! at a minimum , isn \\u2019 t taking an all natural vegetarian supplement a good way to get some of the trace micro-nutrients into our bodies ? i see many ingredients on the label that i would not get otherwise . i take solgar earth source multi-nutrient tablets ( $ 45 for 180 tablets \\u2026 so , it \\u2019 s like $ 90 / year ( one a day ) and it is full of things that you recommend . granted it is all mixed together.eating whole unrefined plant foods will not result in vitamin deficiencies other then b12 . what vitamins do you think you are missing ? also , you can put in your food for the day and see how much of each nutrient you are getting . i typically surpass recommendations eating only whole plant foods . use this website run by the usda for that purpose. http : / / cronometer.com / it would have been interesting to tease out those people who had the lowest intake of animal products from those with the highest . an argument could be made that vegans , who tend to be low or deficient in b12 , protein , and iron , may have benefitted from multivitamins . in the absence of evidence , its hard to argue with your points , but i \\u2019 d say the jury is still out on this question.vegans are actually not low in protein , this would entail caloric deficiency http : / / nutritionfacts.org / video / do-vegetarians-get-enough-protein / in addition , appropriate iron status is not difficult for most people as long as they consume a diet based on whole , unrefined plant foods . please see here for details on enhancing absorption other then vitamin c. http : / / nutritionfacts.org / video / new-mineral-absorption-enhancers-found / the study you provided suggests that 3 % of people actually are protein deficient , so clearly it can be a problem , as it was for me . i was at 25 grams a day protein because i had dropped most grains as a result of becoming pre-diabetic on an ornish-style diet for 30 years ( despite being slender / fit ) . dropping grains brought my glucose down to normal levels , thankfully , and i \\u2019 ve since learned to add back large quantities of daily tofu and some seitan so i \\u2019 m finally stable re protein.however , it is a very difficult challenge to maintain sufficient b12 or iron on a plant-based diet , so i \\u2019 ve had to add in a multi-vitamin for those . please see this meta-study on b12 deficiency . it outlines how serious the b12 problem may be : http : / / www.nature.com / ejcn / journal / v68 / n5 / full / ejcn201446a.htmlwould if you fast or are deficient in nutrients ? just supplement with the micronutrients ? lifespan , longevity , mortality , multivitamins , nutrition myths , supplements what apparent effect does taking multivitamin supplement have on our lifespan ? i also have these videos on multivitamins : multivitamin supplements and breast cancer - vol 5 should we take a multivitamin ? please feel free to post any ask-the-doctor type questions here in the comments section and i \\u2019 d be happy to try to answer them . and check out the other \\u201c hhh \\u201d videos ( harmful , harmless , or helpful ? ) . also , there are over a thousand subjects covered in the rest of my videos \\u2013 please feel free to explore them as well !\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"entity_count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 35,\n \"min\": 0,\n \"max\": 259,\n \"num_unique_values\": 171,\n \"samples\": [\n 101,\n 55,\n 56\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 19 - } - ] - }, - { - "cell_type": "code", - "source": [ - "df_all.sort_values('entity_count').head(100)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "3AYpXWZIugug", - "outputId": "45c444a6-2244-457f-c244-67783890de34" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " qid query \\\n", - "2 53 Should blood donations be financially compensa... \n", - "39 40 michworks \n", - "96 2180086 what are mormens? \n", - "44 145 vines for shade \n", - "46 47 indexed annuity \n", - ".. ... ... \n", - "16 317 Unsolicited Faxes \n", - "217 218 MedWatch \n", - "283 PLAIN-593 apnea \n", - "20 321 Women in Parliaments \n", - "12 413 steel production \n", - "\n", - " description type \\\n", - "2 NaN NaN \n", - "39 Find information on getting a job in Michigan.... faceted \n", - "96 NaN NaN \n", - "44 \\n information on vines that can be grown i... ambiguous \n", - "46 I'm looking for information about indexed annu... faceted \n", - ".. ... ... \n", - "16 Have regulations been passed by the FCC bannin... NaN \n", - "217 NaN NaN \n", - "283 NaN NaN \n", - "20 Pertinent documents will reflect the fact that... NaN \n", - "12 What are new methods of producing steel? NaN \n", - "\n", - " subtopics \\\n", - "2 NaN \n", - "39 ((1, \\n Take me to the michworks Michigan T... \n", - "96 NaN \n", - "44 ((1, \\n information on vines that can be gr... \n", - "46 ((1, \\n What is an indexed annuity? What a... \n", - ".. ... \n", - "16 NaN \n", - "217 NaN \n", - "283 NaN \n", - "20 NaN \n", - "12 NaN \n", - "\n", - " original_query \\\n", - "2 {'query_id': '53', 'title': 'Should blood dona... \n", - "39 {'query_id': '40', 'query': 'michworks', 'desc... \n", - "96 {'query_id': '2180086', 'text': 'what are morm... \n", - "44 {'query_id': '145', 'query': 'vines for shade'... \n", - "46 {'query_id': '47', 'query': 'indexed annuity',... \n", - ".. ... \n", - "16 {'query_id': '317', 'title': 'Unsolicited Faxe... \n", - "217 {'query_id': '218', 'text': 'MedWatch'} \n", - "283 {'query_id': 'PLAIN-593', 'title': 'apnea', 'a... \n", - "20 {'query_id': '321', 'title': 'Women in Parliam... \n", - "12 {'query_id': '413', 'title': 'steel production... \n", - "\n", - " entities \\\n", - "2 [] \n", - "39 [] \n", - "96 [] \n", - "44 [] \n", - "46 [] \n", - ".. ... \n", - "16 [{'begin': 12, 'end': 17, 'mention': 'faxes', ... \n", - "217 [{'begin': 0, 'end': 8, 'mention': 'medwatch',... \n", - "283 [{'begin': 0, 'end': 5, 'mention': 'apnea', 'u... \n", - "20 [{'begin': 9, 'end': 20, 'mention': 'parliamen... \n", - "12 [{'begin': 0, 'end': 16, 'mention': 'steel pro... \n", - "\n", - " dataset title \\\n", - "2 argsme/2020-04-01/touche-2021-task-1 NaN \n", - "39 clueweb09/en/trec-web-2009 NaN \n", - "96 antique/test NaN \n", - "44 clueweb09/en/trec-web-2011 NaN \n", - "46 clueweb09/en/trec-web-2009 NaN \n", - ".. ... ... \n", - "16 disks45/nocr/trec-robust-2004 Unsolicited Faxes \n", - "217 gov/trec-web-2004 NaN \n", - "283 nfcorpus/test apnea \n", - "20 disks45/nocr/trec-robust-2004 Women in Parliaments \n", - "12 disks45/nocr/trec8 steel production \n", - "\n", - " narrative need context disease \\\n", - "2 NaN NaN NaN NaN \n", - "39 NaN NaN NaN NaN \n", - "96 NaN NaN NaN NaN \n", - "44 NaN NaN NaN NaN \n", - "46 NaN NaN NaN NaN \n", - ".. ... ... ... ... \n", - "16 Relevant documents will provide information on... NaN NaN NaN \n", - "217 NaN NaN NaN NaN \n", - "283 NaN NaN NaN NaN \n", - "20 Pertinent documents relating to this issue wil... NaN NaN NaN \n", - "12 Relevant documents will discuss the processes ... NaN NaN NaN \n", - "\n", - " gene demographic other all \\\n", - "2 NaN NaN NaN NaN \n", - "39 NaN NaN NaN NaN \n", - "96 NaN NaN NaN NaN \n", - "44 NaN NaN NaN NaN \n", - "46 NaN NaN NaN NaN \n", - ".. ... ... ... ... \n", - "16 NaN NaN NaN NaN \n", - "217 NaN NaN NaN NaN \n", - "283 NaN NaN NaN apnea - - infants , milk , sids , dairy , crib... \n", - "20 NaN NaN NaN NaN \n", - "12 NaN NaN NaN NaN \n", - "\n", - " entity_count \n", - "2 0 \n", - "39 0 \n", - "96 0 \n", - "44 0 \n", - "46 0 \n", - ".. ... \n", - "16 1 \n", - "217 1 \n", - "283 1 \n", - "20 1 \n", - "12 1 \n", - "\n", - "[100 rows x 18 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qidquerydescriptiontypesubtopicsoriginal_queryentitiesdatasettitlenarrativeneedcontextdiseasegenedemographicotherallentity_count
253Should blood donations be financially compensa...NaNNaNNaN{'query_id': '53', 'title': 'Should blood dona...[]argsme/2020-04-01/touche-2021-task-1NaNNaNNaNNaNNaNNaNNaNNaNNaN0
3940michworksFind information on getting a job in Michigan....faceted((1, \\n Take me to the michworks Michigan T...{'query_id': '40', 'query': 'michworks', 'desc...[]clueweb09/en/trec-web-2009NaNNaNNaNNaNNaNNaNNaNNaNNaN0
962180086what are mormens?NaNNaNNaN{'query_id': '2180086', 'text': 'what are morm...[]antique/testNaNNaNNaNNaNNaNNaNNaNNaNNaN0
44145vines for shade\\n information on vines that can be grown i...ambiguous((1, \\n information on vines that can be gr...{'query_id': '145', 'query': 'vines for shade'...[]clueweb09/en/trec-web-2011NaNNaNNaNNaNNaNNaNNaNNaNNaN0
4647indexed annuityI'm looking for information about indexed annu...faceted((1, \\n What is an indexed annuity? What a...{'query_id': '47', 'query': 'indexed annuity',...[]clueweb09/en/trec-web-2009NaNNaNNaNNaNNaNNaNNaNNaNNaN0
.........................................................
16317Unsolicited FaxesHave regulations been passed by the FCC bannin...NaNNaN{'query_id': '317', 'title': 'Unsolicited Faxe...[{'begin': 12, 'end': 17, 'mention': 'faxes', ...disks45/nocr/trec-robust-2004Unsolicited FaxesRelevant documents will provide information on...NaNNaNNaNNaNNaNNaNNaN1
217218MedWatchNaNNaNNaN{'query_id': '218', 'text': 'MedWatch'}[{'begin': 0, 'end': 8, 'mention': 'medwatch',...gov/trec-web-2004NaNNaNNaNNaNNaNNaNNaNNaNNaN1
283PLAIN-593apneaNaNNaNNaN{'query_id': 'PLAIN-593', 'title': 'apnea', 'a...[{'begin': 0, 'end': 5, 'mention': 'apnea', 'u...nfcorpus/testapneaNaNNaNNaNNaNNaNNaNNaNapnea - - infants , milk , sids , dairy , crib...1
20321Women in ParliamentsPertinent documents will reflect the fact that...NaNNaN{'query_id': '321', 'title': 'Women in Parliam...[{'begin': 9, 'end': 20, 'mention': 'parliamen...disks45/nocr/trec-robust-2004Women in ParliamentsPertinent documents relating to this issue wil...NaNNaNNaNNaNNaNNaNNaN1
12413steel productionWhat are new methods of producing steel?NaNNaN{'query_id': '413', 'title': 'steel production...[{'begin': 0, 'end': 16, 'mention': 'steel pro...disks45/nocr/trec8steel productionRelevant documents will discuss the processes ...NaNNaNNaNNaNNaNNaNNaN1
\n", - "

100 rows × 18 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "repr_error": "'str' object has no attribute 'empty'" - } - }, - "metadata": {}, - "execution_count": 23 - } - ] - } - ] -} \ No newline at end of file