diff --git a/baseline-retrieval-system/baseline-retrieval-system.ipynb b/baseline-retrieval-system/baseline-retrieval-system.ipynb index e8778d6..c8eed51 100644 --- a/baseline-retrieval-system/baseline-retrieval-system.ipynb +++ b/baseline-retrieval-system/baseline-retrieval-system.ipynb @@ -30,102 +30,149 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: tira in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.0.129)\n", - "Requirement already satisfied: ir-datasets in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.5.7)\n", - "Requirement already satisfied: python-terrier in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.10.1)\n", - "Requirement already satisfied: requests==2.*,>=2.26 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tira) (2.31.0)\n", - "Requirement already satisfied: docker==6.*,>=6.0.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tira) (6.1.3)\n", - "Requirement already satisfied: pandas in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tira) (2.0.2)\n", - "Requirement already satisfied: tqdm in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tira) (4.66.4)\n", - "Requirement already satisfied: packaging>=14.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from docker==6.*,>=6.0.0->tira) (23.2)\n", - "Requirement already satisfied: urllib3>=1.26.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from docker==6.*,>=6.0.0->tira) (2.0.3)\n", - "Requirement already satisfied: websocket-client>=0.32.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from docker==6.*,>=6.0.0->tira) (1.8.0)\n", - "Requirement already satisfied: pywin32>=304 in c:\\users\\michael\\appdata\\roaming\\python\\python311\\site-packages (from docker==6.*,>=6.0.0->tira) (306)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests==2.*,>=2.26->tira) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests==2.*,>=2.26->tira) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests==2.*,>=2.26->tira) (2023.5.7)\n", - "Requirement already satisfied: beautifulsoup4>=4.4.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (4.12.3)\n", - "Requirement already satisfied: inscriptis>=2.2.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (2.5.0)\n", - "Requirement already satisfied: lxml>=4.5.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (5.2.2)\n", - "Requirement already satisfied: numpy>=1.18.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (1.24.2)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (6.0.1)\n", - "Requirement already satisfied: trec-car-tools>=2.5.4 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (2.6)\n", - "Requirement already satisfied: lz4>=3.1.10 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (4.3.3)\n", - "Requirement already satisfied: warc3-wet>=0.2.3 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (0.2.3)\n", - "Requirement already satisfied: warc3-wet-clueweb09>=0.2.5 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (0.2.5)\n", - "Requirement already satisfied: zlib-state>=0.1.3 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (0.1.6)\n", - "Requirement already satisfied: ijson>=3.1.3 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (3.2.3)\n", - "Requirement already satisfied: unlzw3>=0.2.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-datasets) (0.2.2)\n", - "Requirement already satisfied: wget in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (3.2)\n", - "Requirement already satisfied: pyjnius>=1.4.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (1.6.1)\n", - "Requirement already satisfied: matchpy in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (0.5.5)\n", - "Requirement already satisfied: deprecated in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (1.2.14)\n", - "Requirement already satisfied: chest in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (0.2.3)\n", - "Requirement already satisfied: scipy in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (1.12.0)\n", - "Requirement already satisfied: joblib in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (1.4.2)\n", - "Requirement already satisfied: nptyping==1.4.4 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (1.4.4)\n", - "Requirement already satisfied: more-itertools in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (10.2.0)\n", - "Requirement already satisfied: jinja2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (3.1.2)\n", - "Requirement already satisfied: statsmodels in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (0.14.2)\n", - "Requirement already satisfied: ir-measures>=0.3.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (0.3.3)\n", - "Requirement already satisfied: dill in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (0.3.8)\n", - "Requirement already satisfied: pytrec-eval-terrier>=0.5.3 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from python-terrier) (0.5.6)\n", - "Requirement already satisfied: typish>=1.7.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from nptyping==1.4.4->python-terrier) (1.9.3)\n", - "Requirement already satisfied: soupsieve>1.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from beautifulsoup4>=4.4.1->ir-datasets) (2.5)\n", - "Requirement already satisfied: cwl-eval>=1.0.10 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from ir-measures>=0.3.1->python-terrier) (1.0.12)\n", - "Requirement already satisfied: colorama in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tqdm->tira) (0.4.6)\n", - "Requirement already satisfied: cbor>=1.0.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from trec-car-tools>=2.5.4->ir-datasets) (1.0.0)\n", - "Requirement already satisfied: heapdict in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from chest->python-terrier) (1.0.1)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from deprecated->python-terrier) (1.16.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jinja2->python-terrier) (2.1.3)\n", - "Requirement already satisfied: multiset<3.0,>=2.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matchpy->python-terrier) (2.1.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas->tira) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas->tira) (2023.3)\n", - "Requirement already satisfied: tzdata>=2022.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas->tira) (2023.3)\n", - "Requirement already satisfied: patsy>=0.5.6 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from statsmodels->python-terrier) (0.5.6)\n", - "Requirement already satisfied: six in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from patsy>=0.5.6->statsmodels->python-terrier) (1.16.0)\n", - "Requirement already satisfied: spacy in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (3.7.5)\n", - "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (3.0.12)\n", - "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (1.0.5)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (1.0.10)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (2.0.8)\n", - "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (3.0.9)\n", - "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (8.2.4)\n", - "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (1.1.3)\n", - "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (2.4.8)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (2.0.10)\n", - "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (0.4.1)\n", - "Requirement already satisfied: typer<1.0.0,>=0.3.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (0.12.3)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (4.66.4)\n", - "Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (2.31.0)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (2.7.4)\n", - "Requirement already satisfied: jinja2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (3.1.2)\n", - "Requirement already satisfied: setuptools in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (65.5.0)\n", - "Requirement already satisfied: packaging>=20.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (23.2)\n", - "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (3.4.0)\n", - "Requirement already satisfied: numpy>=1.19.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from spacy) (1.24.2)\n", - "Requirement already satisfied: language-data>=1.2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.18.4 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.18.4)\n", - "Requirement already satisfied: typing-extensions>=4.6.1 in c:\\users\\michael\\appdata\\roaming\\python\\python311\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.0.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2023.5.7)\n", - "Requirement already satisfied: blis<0.8.0,>=0.7.8 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n", - "Requirement already satisfied: confection<1.0.0,>=0.0.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.5)\n", - "Requirement already satisfied: colorama in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tqdm<5.0.0,>=4.38.0->spacy) (0.4.6)\n", - "Requirement already satisfied: click>=8.0.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from typer<1.0.0,>=0.3.0->spacy) (8.1.3)\n", - "Requirement already satisfied: shellingham>=1.3.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from typer<1.0.0,>=0.3.0->spacy) (1.5.4)\n", - "Requirement already satisfied: rich>=10.11.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from typer<1.0.0,>=0.3.0->spacy) (13.7.1)\n", - "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from weasel<0.5.0,>=0.1.0->spacy) (0.18.1)\n", - "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from weasel<0.5.0,>=0.1.0->spacy) (7.0.4)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jinja2->spacy) (2.1.3)\n", - "Requirement already satisfied: marisa-trie>=0.7.7 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\michael\\appdata\\roaming\\python\\python311\\site-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (2.18.0)\n", - "Requirement already satisfied: wrapt in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy) (1.16.0)\n", - "Requirement already satisfied: mdurl~=0.1 in c:\\users\\michael\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (0.1.2)\n" + "Requirement already satisfied: tira in /usr/local/lib/python3.10/dist-packages (0.0.134)\n", + "Requirement already satisfied: ir-datasets in /usr/local/lib/python3.10/dist-packages (0.5.5)\n", + "Requirement already satisfied: python-terrier in /usr/local/lib/python3.10/dist-packages (0.10.0)\n", + "Requirement already satisfied: requests==2.*,>=2.26 in /usr/local/lib/python3.10/dist-packages (from tira) (2.31.0)\n", + "Requirement already satisfied: docker==7.*,>=7.1.0 in /usr/local/lib/python3.10/dist-packages (from tira) (7.1.0)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from tira) (2.1.3)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from tira) (4.66.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tira) (23.2)\n", + "Requirement already satisfied: numpy==1.* in /usr/local/lib/python3.10/dist-packages (from tira) (1.26.2)\n", + "Requirement already satisfied: urllib3>=1.26.0 in /usr/local/lib/python3.10/dist-packages (from docker==7.*,>=7.1.0->tira) (2.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (2023.11.17)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.3.2)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (6.0.1)\n", + "Requirement already satisfied: lz4>=3.1.10 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.3.2)\n", + "Requirement already satisfied: lxml>=4.5.2 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.9.3)\n", + "Requirement already satisfied: warc3-wet-clueweb09>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.5)\n", + "Requirement already satisfied: ijson>=3.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (3.2.3)\n", + "Requirement already satisfied: unlzw3>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.2)\n", + "Requirement already satisfied: warc3-wet>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.3)\n", + "Requirement already satisfied: beautifulsoup4>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.12.2)\n", + "Requirement already satisfied: pyautocorpus>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.12)\n", + "Requirement already satisfied: zlib-state>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.6)\n", + "Requirement already satisfied: inscriptis>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.3.2)\n", + "Requirement already satisfied: trec-car-tools>=2.5.4 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.6)\n", + "Requirement already satisfied: nptyping==1.4.4 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.4.4)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.11.4)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", + "Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.7)\n", + "Requirement already satisfied: pyjnius>=1.4.2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.6.1)\n", + "Requirement already satisfied: deprecated in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.2.14)\n", + "Requirement already satisfied: ir-measures>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.1.2)\n", + "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from python-terrier) (10.1.0)\n", + "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.2)\n", + "Requirement already satisfied: chest in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.2.3)\n", + "Requirement already satisfied: statsmodels in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.14.0)\n", + "Requirement already satisfied: matchpy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.5)\n", + "Requirement already satisfied: pytrec-eval-terrier>=0.5.3 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.6)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", + "Requirement already satisfied: typish>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from nptyping==1.4.4->python-terrier) (1.9.3)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4>=4.4.1->ir-datasets) (2.5)\n", + "Requirement already satisfied: cwl-eval>=1.0.10 in /usr/local/lib/python3.10/dist-packages (from ir-measures>=0.3.1->python-terrier) (1.0.12)\n", + "Requirement already satisfied: cbor>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from trec-car-tools>=2.5.4->ir-datasets) (1.0.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.10/dist-packages (from chest->python-terrier) (1.0.1)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated->python-terrier) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->python-terrier) (2.1.3)\n", + "Requirement already satisfied: multiset<3.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from matchpy->python-terrier) (2.1.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->tira) (2023.3.post1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->tira) (2.8.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->tira) (2023.3)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->python-terrier) (3.2.0)\n", + "Requirement already satisfied: patsy>=0.5.2 in /usr/local/lib/python3.10/dist-packages (from statsmodels->python-terrier) (0.5.4)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.2->statsmodels->python-terrier) (1.16.0)\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0mRequirement already satisfied: spacy in /usr/local/lib/python3.10/dist-packages (3.7.5)\n", + "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.0.5)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.7.4)\n", + "Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (0.12.3)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.31.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy) (59.6.0)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.12)\n", + "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.1.3)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.8)\n", + "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.4.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.1.2)\n", + "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (8.2.5)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (4.66.1)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.10)\n", + "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.4.8)\n", + "Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.26.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (23.2)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.9)\n", + "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (0.4.1)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.0.10)\n", + "Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.10/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.18.4)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.8.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.1.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2023.11.17)\n", + "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.5)\n", + "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (8.1.7)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (13.7.1)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (1.5.4)\n", + "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy) (7.0.4)\n", + "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy) (0.18.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy) (2.1.3)\n", + "Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (2.17.2)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy) (1.16.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (0.1.2)\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0mCollecting en-core-web-md==3.7.1\n", + " Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)\n", + "Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.10/dist-packages (from en-core-web-md==3.7.1) (3.7.5)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.0.10)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.10)\n", + "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.4.1)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.7.4)\n", + "Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.26.2)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.31.0)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (23.2)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (4.66.1)\n", + "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (8.2.5)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.9)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (59.6.0)\n", + "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.1.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.1.2)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.8)\n", + "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.4.8)\n", + "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.4.0)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.12)\n", + "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.0.5)\n", + "Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.12.3)\n", + "Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.10/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.2.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.18.4)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (4.8.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.1.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2023.11.17)\n", + "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.1.5)\n", + "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.7.11)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (13.7.1)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (8.1.7)\n", + "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.18.1)\n", + "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (7.0.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.1.3)\n", + "Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.2.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.17.2)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.16.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.1.2)\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", + "You can now load the package via spacy.load('en_core_web_md')\n" ] } ], @@ -133,30 +180,23 @@ "# You only need to execute this cell if you are using Google Golab.\n", "# If you use GitHub Codespaces, everything is already installed.\n", "!pip3 install tira ir-datasets python-terrier\n", - "!pip3 install spacy" + "!pip3 install spacy\n", + "!python3 -m spacy download en_core_web_md" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Michael\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "# Imports\n", "from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run\n", "from tira.rest_api_client import Client\n", "import pyterrier as pt\n", "import spacy \n", - "import pandas as pd\n" + "import pandas as pd\n", + "import en_core_web_md\n" ] }, { @@ -168,8 +208,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "PyTerrier 0.10.1 has loaded Terrier 5.7 (built by craigm on 2022-11-10 18:30) and terrier-helper 0.0.7\n", - "\n" + "PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n", + "\n", + "No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n" ] }, { @@ -2510,14 +2551,23 @@ "Testing word: identification in query: web pages identification\n", " name ndcg_cut_10 recip_rank recall_1000\n", "0 BM25 - Low Entities 0.0 0.038462 0.923077\n", - "Testing word: exhaustivity in query: exhaustivity of index\n" + "Testing word: exhaustivity in query: exhaustivity of index\n", + " name ndcg_cut_10 recip_rank recall_1000\n", + "0 BM25 - Low Entities 0.0 0.083333 1.0\n", + "Testing word: of in query: exhaustivity of index\n", + " name ndcg_cut_10 recip_rank recall_1000\n", + "0 BM25 - Low Entities 0.0 0.083333 1.0\n", + "Testing word: index in query: exhaustivity of index\n", + " name ndcg_cut_10 recip_rank recall_1000\n", + "0 BM25 - Low Entities 0.0 0.083333 1.0\n", + "Testing word: query in query: query optimization\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\Michael\\AppData\\Local\\Temp\\ipykernel_2560\\3709681870.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", + "/tmp/ipykernel_7980/2168277619.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", " similarity = token.similarity(vocab_word)\n" ] }, @@ -2525,15 +2575,6 @@ "name": "stdout", "output_type": "stream", "text": [ - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: of in query: exhaustivity of index\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: index in query: exhaustivity of index\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: query in query: query optimization\n", " name ndcg_cut_10 recip_rank recall_1000\n", "0 BM25 - Low Entities 0.706544 0.5 0.821429\n", "Testing word: improving in query: query improving\n", @@ -2937,24 +2978,14 @@ "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", "Testing word: logs in query: search engine optimization with query logs\n", " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: bm25 in query: bm25\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.413128 0.5 1.0\n", - "Testing word: somethin in query: somethin makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: why in query: why makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: how in query: how makes natural language processing natural\n" + "0 BM25 - Low Entities 0.343018 0.333333 1.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\Michael\\AppData\\Local\\Temp\\ipykernel_2560\\3709681870.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", + "/tmp/ipykernel_7980/2168277619.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", " similarity = token.similarity(vocab_word)\n" ] }, @@ -2962,6 +2993,16 @@ "name": "stdout", "output_type": "stream", "text": [ + "Testing word: bm25 in query: bm25\n", + " name ndcg_cut_10 recip_rank recall_1000\n", + "0 BM25 - Low Entities 0.413128 0.5 1.0\n", + "Testing word: somethin in query: somethin makes natural language processing natural\n", + " name ndcg_cut_10 recip_rank recall_1000\n", + "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", + "Testing word: why in query: why makes natural language processing natural\n", + " name ndcg_cut_10 recip_rank recall_1000\n", + "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", + "Testing word: how in query: how makes natural language processing natural\n", " name ndcg_cut_10 recip_rank recall_1000\n", "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", "Testing word: what in query: what makes natural language processing natural\n", @@ -3716,14 +3757,18 @@ "0 BM25 - Low Entities 0.0 0.033333 0.9\n", "Testing word: lemmatization in query: lemmatization algorithms\n", " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.414123 0.5 1.0\n" + "0 BM25 - Low Entities 0.414123 0.5 1.0\n", + "Testing word: algorithms in query: lemmatization algorithms\n", + " name ndcg_cut_10 recip_rank recall_1000\n", + "0 BM25 - Low Entities 0.414123 0.5 1.0\n", + "Testing word: algorithm in query: lemmatization algorithm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\Michael\\AppData\\Local\\Temp\\ipykernel_2560\\3709681870.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", + "/tmp/ipykernel_7980/2168277619.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", " similarity = token.similarity(vocab_word)\n" ] }, @@ -3731,10 +3776,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "Testing word: algorithms in query: lemmatization algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.414123 0.5 1.0\n", - "Testing word: algorithm in query: lemmatization algorithm\n", " name ndcg_cut_10 recip_rank recall_1000\n", "0 BM25 - Low Entities 0.414123 0.5 1.0\n", "Testing word: analysis in query: lemmatization analysis\n", @@ -4185,7 +4226,7 @@ ], "source": [ "\n", - "nlp = spacy.load('en_core_web_md')\n", + "nlp = en_core_web_md.load()\n", "\n", "def get_similar_words(word, threshold=0.60):\n", " token = nlp(word)\n", @@ -4296,12 +4337,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "IRDSDataset('ir-lab-sose-2024/ir-acl-anthology-20240504-training')\n", + "IRDSDataset('ir-lab-sose-2024/ir-acl-anthology-20240504-training')\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Download: 55.0kiB [00:00, 1.48MiB/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Download finished. Extract...\n", + "Extraction finished: /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/marcel-gohsen\n", "{'qid': '2', 'query': 'machine learning language identification', 'original_query': {'query_id': '2', 'title': 'machine learning language identification', 'description': 'What papers are about machine learning for language identification?', 'narrative': 'Relevant papers include research on methods of machine learning for language identification or how to improve those methods. Papers that focus on other methods for language identification or the usaged of machine learning not for language identification are not relevant.'}, 'entities': [{'begin': 17, 'end': 40, 'mention': 'language identification', 'url': 'https://en.wikipedia.org/wiki/Language_identification', 'score': 1.0}, {'begin': 0, 'end': 16, 'mention': 'machine learning', 'url': 'https://en.wikipedia.org/wiki/Machine_learning', 'score': 0.9745664739884391}, {'begin': 8, 'end': 16, 'mention': 'learning', 'url': 'https://en.wikipedia.org/wiki/Learning', 'score': 0.8932038834951451}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/machine', 'score': 0.597355769230769}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(psychology)', 'score': 0.18333333333333302}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identity_document', 'score': 0.12083333333333302}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(information)', 'score': 0.07916666666666601}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Political_machine', 'score': 0.048076923076923}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(producer)', 'score': 0.042067307692307}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Forensic_identification', 'score': 0.041666666666666005}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(biology)', 'score': 0.0375}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(2017_film)', 'score': 0.033653846153846}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Body_identification', 'score': 0.033333333333333}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Static-X_album)', 'score': 0.032451923076923}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Station_identification', 'score': 0.025}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/System_identification', 'score': 0.020833333333333003}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Animal_identification', 'score': 0.020833333333333003}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Parameter_identification_problem', 'score': 0.020833333333333003}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(band)', 'score': 0.019230769230769003}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identifiability', 'score': 0.016666666666666}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Imagine_Dragons_song)', 'score': 0.015625}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Eyewitness_identification', 'score': 0.0125}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Organizational_identification', 'score': 0.0125}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(EP)', 'score': 0.010817307692307002}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Crack_the_Sky_album)', 'score': 0.008413461538461}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_friend_or_foe', 'score': 0.008333333333333}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(literature)', 'score': 0.008333333333333}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(2006_film)', 'score': 0.0072115384615380005}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(patent)', 'score': 0.004807692307692}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Party_identification', 'score': 0.0041666666666660005}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Gender_identity', 'score': 0.0041666666666660005}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(novel)', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/computer', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Higdon)', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Turing_machine', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Person_of_Interest_(TV_series)#The_Machine', 'score': 0.001201923076923}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Abstract_machine', 'score': 0.001201923076923}]}\n", "{'qid': '1', 'query': 'retrieval system improving effectiveness', 'original_query': {'query_id': '1', 'title': 'retrieval system improving effectiveness', 'description': 'What papers focus on improving the effectiveness of a retrieval system?', 'narrative': 'Relevant papers include research on what makes a retrieval system effective and what improves the effectiveness of a retrieval system. Papers that focus on improving something else or improving the effectiveness of a system that is not a retrieval system are not relevant.'}, 'entities': [{'begin': 27, 'end': 40, 'mention': 'effectiveness', 'url': 'https://en.wikipedia.org/wiki/Effectiveness', 'score': 0.8193548387096771}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/recall_(memory)', 'score': 0.46212121212121204}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Information_retrieval', 'score': 0.204545454545454}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Retrieval', 'score': 0.09848484848484801}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Retrieval_(film)', 'score': 0.07575757575757501}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Data_retrieval', 'score': 0.045454545454545005}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Knowledge_retrieval', 'score': 0.015151515151515001}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Document_retrieval', 'score': 0.007575757575757001}]}\n", "33\n", "35\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] } ], "source": [ @@ -4583,7 +4646,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Now we do the retrieval...\n", + "Now we do the retrieval...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Done. Here are the first 10 entries of the run\n" ] },