Skip to content

Commit

Permalink
Align all parquet BEIR yaml configs with jsonl versions (#2612)
Browse files Browse the repository at this point in the history
{flat, hnsw} x {fp32, int8}: aligned scores, tolerances, etc.
  • Loading branch information
lintool authored Sep 25, 2024
1 parent 3e5a114 commit e0ee832
Show file tree
Hide file tree
Showing 232 changed files with 11,977 additions and 4,408 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: flat
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: -quantize.int8

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: JsonStringVector
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-int8-cached
display: BGE-base-en-v1.5
type: flat
params: -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.001
R@100:
- 0.001
R@1000:
- 0.001
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: flat
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: -quantize.int8

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.tsv.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-int8-onnx
display: BGE-base-en-v1.5
type: flat
params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.02
R@100:
- 0.03
R@1000:
- 0.004
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

Expand All @@ -9,45 +10,44 @@ index_threads: 16
index_options: ""

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: JsonStringVector
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-cached
display: BGE-base-en-v1.5
type: flat
params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads
16 -hits 1000
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
- name: bge-flat-cached
display: BGE-base-en-v1.5
type: flat
params: -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
Original file line number Diff line number Diff line change
@@ -1,53 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/parquet/arguana
index_path: indexes/lucene-flat.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: flat
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: ""

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.tsv.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.tsv.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-flat-onnx
display: BGE-base-en-v1.5
type: flat
params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads
16 -hits 1000 -encoder BgeBaseEn15
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
- name: bge-flat-onnx
display: BGE-base-en-v1.5
type: flat
params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.02
R@100:
- 0.02
R@1000:
- 0.004
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
---
corpus: beir-v1.0.0-arguana.bge-base-en-v1.5
corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet

index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/
index_type: hnsw
collection_class: ParquetDenseVectorCollection
generator_class: ParquetDenseVectorDocumentGenerator
index_threads: 16
index_options: -M 16 -efC 100 -quantize.int8

metrics:
- metric: nDCG@10
command: bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: JsonStringVector
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: bge-hnsw-int8-cached
display: BGE-base-en-v1.5
type: hnsw
params: -hits 1000 -efSearch 1000 -removeQuery -threads 16
results:
nDCG@10:
- 0.6361
R@100:
- 0.9915
R@1000:
- 0.9964
tolerance:
nDCG@10:
- 0.001
R@100:
- 0.001
R@1000:
- 0.001
Loading

0 comments on commit e0ee832

Please sign in to comment.