Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align all parquet BEIR yaml configs with jsonl versions #2612

Merged
merged 4 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: flat
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: -quantize.int8

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: JsonStringVector
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-int8-cached
display: BGE-base-en-v1.5
type: flat
params: -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.001
R@100:
- 0.001
R@1000:
- 0.001
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: flat
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: -quantize.int8

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.tsv.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-int8-onnx
display: BGE-base-en-v1.5
type: flat
params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.02
R@100:
- 0.03
R@1000:
- 0.004
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

Expand All @@ -9,45 +10,44 @@ index_threads: 16
index_options: ""

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: JsonStringVector
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-cached
display: BGE-base-en-v1.5
type: flat
params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads
16 -hits 1000
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
- name: bge-flat-cached
display: BGE-base-en-v1.5
type: flat
params: -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
Original file line number Diff line number Diff line change
@@ -1,53 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/parquet/arguana
index_path: indexes/lucene-flat.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: flat
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: ""

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.tsv.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.tsv.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-onnx
display: BGE-base-en-v1.5
type: flat
params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads
16 -hits 1000 -encoder BgeBaseEn15
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
- name: bge-flat-onnx
display: BGE-base-en-v1.5
type: flat
params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.02
R@100:
- 0.02
R@1000:
- 0.004
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: hnsw
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: -M 16 -efC 100 -quantize.int8

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: JsonStringVector
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-hnsw-int8-cached
display: BGE-base-en-v1.5
type: hnsw
params: -hits 1000 -efSearch 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.001
R@100:
- 0.001
R@1000:
- 0.001
Loading
Loading