Skip to content

Commit

Permalink
fix run
Browse files Browse the repository at this point in the history
  • Loading branch information
ClemensHoerl committed Jun 24, 2024
1 parent 00fb24b commit ca8070a
Show file tree
Hide file tree
Showing 2 changed files with 188,694 additions and 12 deletions.
73 changes: 61 additions & 12 deletions final/Final.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,19 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n",
"\n",
"No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n"
]
}
],
"source": [
"# Imports\n",
"from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run\n",
Expand All @@ -38,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -61,9 +71,38 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"ir-benchmarks/antique-test-20230107-training documents: 0%| | 1828/403666 [00:02<03:26, 1943.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14:24:05.530 [ForkJoinPool-1-worker-3] WARN org.terrier.structures.indexing.Indexer - Adding an empty document to the index (2824443_2) - further warnings are suppressed\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"ir-benchmarks/antique-test-20230107-training documents: 100%|██████████| 403666/403666 [00:41<00:00, 9720.29it/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14:24:49.744 [ForkJoinPool-1-worker-3] WARN org.terrier.structures.indexing.Indexer - Indexed 1570 empty documents\n"
]
}
],
"source": [
"def create_index(documents, stopwords):\n",
" indexer = pt.IterDictIndexer(\"/tmp/index\", overwrite=True, meta={'docno': 100, 'text': 20480}, stopwords=stopwords)\n",
Expand Down Expand Up @@ -99,7 +138,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -108,7 +147,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -130,7 +169,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -146,7 +185,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -198,7 +237,7 @@
"0 0.928343 "
]
},
"execution_count": 14,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -215,10 +254,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The run file is normalized outside the TIRA sandbox, I will store it at \"../runs\".\n",
"Done. run file is stored under \"../runs/run.txt\".\n"
]
}
],
"source": [
"run = bm25(pt_dataset.get_topics('text'))\n",
"persist_and_normalize_run(run, system_name='bm25-baseline', default_output='../runs')"
]
}
Expand Down
Loading

0 comments on commit ca8070a

Please sign in to comment.