From 3fae60cf858701887e3e8c43bff854c53ebd5da7 Mon Sep 17 00:00:00 2001 From: lintool Date: Mon, 23 Sep 2024 18:35:52 -0400 Subject: [PATCH 1/4] Tweak parquet BEIR yaml configs --- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...na.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...sq.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...er.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...id.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...sh.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ng.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...is.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ca.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...cs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...rs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ts.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ex.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ix.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...rs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ss.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ty.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...er.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...qa.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...qa.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...us.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...nq.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ra.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...04.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...cs.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ct.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...1m.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...id.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...ws.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- ....bge-base-en-v1.5.parquet.flat.cached.yaml | 74 ++++++++--------- ...20.bge-base-en-v1.5.parquet.flat.onnx.yaml | 83 ++++++++++--------- 58 files changed, 2378 insertions(+), 2175 deletions(-) diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml index dc5832bfd..0ad5d23dd 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): ArguAna" - id: test - path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-arguana.test.txt + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.6361 - R@100: - - 0.9915 - R@1000: - - 0.9964 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml index 8aa82ce2b..f7cd3838e 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet -index_path: indexes/parquet/arguana +index_path: indexes/lucene-flat.beir-v1.0.0-arguana.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): ArguAna" - id: test - path: topics.beir-v1.0.0-arguana.test.tsv.gz - qrel: qrels.beir-v1.0.0-arguana.test.txt + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.tsv.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.6361 - R@100: - - 0.9915 - R@1000: - - 0.9964 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.02 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml index 1d969a082..1a384c156 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): BioASQ" - id: test - path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-bioasq.test.txt + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4149 - R@100: - - 0.6317 - R@1000: - - 0.8059 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml index df8e805e4..3fd42f958 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet -index_path: indexes/parquet/bioasq +index_path: indexes/lucene-flat.beir-v1.0.0-bioasq.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): BioASQ" - id: test - path: topics.beir-v1.0.0-bioasq.test.tsv.gz - qrel: qrels.beir-v1.0.0-bioasq.test.txt + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.tsv.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4149 - R@100: - - 0.6317 - R@1000: - - 0.8059 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 + tolerance: + nDCG@10: + - 0.0002 + R@100: + - 0.0002 + R@1000: + - 0.0004 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml index 267024222..f7bdfe008 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Climate-FEVER" - id: test - path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-climate-fever.test.txt + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.3119 - R@100: - - 0.6362 - R@1000: - - 0.8307 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml index f20163e22..04aa15914 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet -index_path: indexes/parquet/climate-fever +index_path: indexes/lucene-flat.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Climate-FEVER" - id: test - path: topics.beir-v1.0.0-climate-fever.test.tsv.gz - qrel: qrels.beir-v1.0.0-climate-fever.test.txt + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3119 - R@100: - - 0.6362 - R@1000: - - 0.8307 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 + tolerance: + nDCG@10: + - 0.0003 + R@100: + - 0.0009 + R@1000: + - 0.0002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml index 913fe0c8b..7e83e76cf 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-android" - id: test - path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.5075 - R@100: - - 0.8454 - R@1000: - - 0.9611 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml index a233ece80..612b807d2 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet -index_path: indexes/parquet/cqadupstack-android +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-android" - id: test - path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.5075 - R@100: - - 0.8454 - R@1000: - - 0.9611 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 + tolerance: + nDCG@10: + - 0.0002 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml index b026e0364..4789018e3 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-english" - id: test - path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4857 - R@100: - - 0.7587 - R@1000: - - 0.8839 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml index 20475994e..533c9d4ec 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet -index_path: indexes/parquet/cqadupstack-english +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-english" - id: test - path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4857 - R@100: - - 0.7587 - R@1000: - - 0.8839 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0002 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml index 89cca2971..618a6a418 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-gaming" - id: test - path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.5965 - R@100: - - 0.9036 - R@1000: - - 0.9719 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml index 1c62465c0..b86c2ee23 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet -index_path: indexes/parquet/cqadupstack-gaming +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-gaming" - id: test - path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.5965 - R@100: - - 0.9036 - R@1000: - - 0.9719 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 + tolerance: + nDCG@10: + - 0.0003 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml index a503c1787..ceb25d8de 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-gis" - id: test - path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4127 - R@100: - - 0.7682 - R@1000: - - 0.9117 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml index 5a70b06db..a45eb8a83 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet -index_path: indexes/parquet/cqadupstack-gis +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-gis" - id: test - path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4127 - R@100: - - 0.7682 - R@1000: - - 0.9117 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 + tolerance: + nDCG@10: + - 0.0005 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml index a65ad90c0..6a2f8662b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-mathematica" - id: test - path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.3163 - R@100: - - 0.6922 - R@1000: - - 0.8810 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml index 60b7d7a50..617b3229b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet -index_path: indexes/parquet/cqadupstack-mathematica +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-mathematica" - id: test - path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3163 - R@100: - - 0.6922 - R@1000: - - 0.8810 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml index 0dc97cad2..aaa268b65 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-physics" - id: test - path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4722 - R@100: - - 0.8081 - R@1000: - - 0.9406 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml index f61588dfa..20ce95ae4 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet -index_path: indexes/parquet/cqadupstack-physics +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-physics" - id: test - path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4722 - R@100: - - 0.8081 - R@1000: - - 0.9406 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 + tolerance: + nDCG@10: + - 0.0003 + R@100: + - 0.0004 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml index 77477da46..a4db97ed3 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-programmers" - id: test - path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4242 - R@100: - - 0.7856 - R@1000: - - 0.9348 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml index ec1cfbd7b..3a6114ed5 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet -index_path: indexes/parquet/cqadupstack-programmers +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-programmers" - id: test - path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4242 - R@100: - - 0.7856 - R@1000: - - 0.9348 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 + tolerance: + nDCG@10: + - 0.0005 + R@100: + - 0.0001 + R@1000: + - 0.0006 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml index 501f90a88..bfdae8ac4 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-stats" - id: test - path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.3732 - R@100: - - 0.6727 - R@1000: - - 0.8445 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml index cee72e909..4a06fd3cc 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet -index_path: indexes/parquet/cqadupstack-stats +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-stats" - id: test - path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3732 - R@100: - - 0.6727 - R@1000: - - 0.8445 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 + tolerance: + nDCG@10: + - 0.0005 + R@100: + - 0.0009 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml index ef840a196..3e6f6b532 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-tex" - id: test - path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.3115 - R@100: - - 0.6486 - R@1000: - - 0.8537 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml index 00f2696cc..f77ea37b4 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet -index_path: indexes/parquet/cqadupstack-tex +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-tex" - id: test - path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3115 - R@100: - - 0.6486 - R@1000: - - 0.8537 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml index e9ee1f42d..dd5129723 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-unix" - id: test - path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4219 - R@100: - - 0.7797 - R@1000: - - 0.9237 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml index 04e7c1a67..71684609c 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet -index_path: indexes/parquet/cqadupstack-unix +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-unix" - id: test - path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4219 - R@100: - - 0.7797 - R@1000: - - 0.9237 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml index 69af15587..509f451ee 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-webmasters" - id: test - path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7774 - R@1000: - - 0.9380 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml index a84c77ec9..d9ba76351 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet -index_path: indexes/parquet/cqadupstack-webmasters +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-webmasters" - id: test - path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7774 - R@1000: - - 0.9380 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml index 696407196..38efb43e7 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-wordpress" - id: test - path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.3547 - R@100: - - 0.7065 - R@1000: - - 0.8861 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml index 99ee62e33..8ba6c3247 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet -index_path: indexes/parquet/cqadupstack-wordpress +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-wordpress" - id: test - path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3547 - R@100: - - 0.7065 - R@1000: - - 0.8861 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.002 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml index 99983fbd6..29b8976a4 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): DBPedia" - id: test - path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4074 - R@100: - - 0.5303 - R@1000: - - 0.7833 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml index fab4ba681..9068e824f 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet -index_path: indexes/parquet/dbpedia-entity +index_path: indexes/lucene-flat.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): DBPedia" - id: test - path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz - qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4074 - R@100: - - 0.5303 - R@1000: - - 0.7833 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 + tolerance: + nDCG@10: + - 0.0002 + R@100: + - 0.0006 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml index 62cf09c15..ac9beb453 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): FEVER" - id: test - path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-fever.test.txt + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fever.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.8630 - R@100: - - 0.9719 - R@1000: - - 0.9855 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml index c9b0bbb41..75ac91f3c 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet -index_path: indexes/parquet/fever +index_path: indexes/lucene-flat.beir-v1.0.0-fever.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): FEVER" - id: test - path: topics.beir-v1.0.0-fever.test.tsv.gz - qrel: qrels.beir-v1.0.0-fever.test.txt + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-fever.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.8630 - R@100: - - 0.9719 - R@1000: - - 0.9855 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 + tolerance: + nDCG@10: + - 0.0002 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml index 37e53a4c0..cf1339329 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): FiQA-2018" - id: test - path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-fiqa.test.txt + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7415 - R@1000: - - 0.9083 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml index ae8e00c96..aa74b3119 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet -index_path: indexes/parquet/fiqa +index_path: indexes/lucene-flat.beir-v1.0.0-fiqa.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): FiQA-2018" - id: test - path: topics.beir-v1.0.0-fiqa.test.tsv.gz - qrel: qrels.beir-v1.0.0-fiqa.test.txt + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7415 - R@1000: - - 0.9083 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml index 75a7e9be0..9417d7105 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): HotpotQA" - id: test - path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.7259 - R@100: - - 0.8727 - R@1000: - - 0.9424 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml index 629ecf212..5a65b1b8b 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet -index_path: indexes/parquet/hotpotqa +index_path: indexes/lucene-flat.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): HotpotQA" - id: test - path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz - qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.7259 - R@100: - - 0.8727 - R@1000: - - 0.9424 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0002 + R@1000: + - 0.0002 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml index e57ec5912..cc5f7e5e6 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): NFCorpus" - id: test - path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.3735 - R@100: - - 0.3368 - R@1000: - - 0.6622 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml index ef1780348..e84bd0f7b 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet -index_path: indexes/parquet/nfcorpus +index_path: indexes/lucene-flat.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): NFCorpus" - id: test - path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz - qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3735 - R@100: - - 0.3368 - R@1000: - - 0.6622 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml index ce7cb88b6..524719eb8 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-nq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): NQ" - id: test - path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-nq.test.txt + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nq.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.5413 - R@100: - - 0.9415 - R@1000: - - 0.9859 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml index 8dc50a42d..35d3ce694 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-nq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet -index_path: indexes/parquet/nq +index_path: indexes/lucene-flat.beir-v1.0.0-nq.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): NQ" - id: test - path: topics.beir-v1.0.0-nq.test.tsv.gz - qrel: qrels.beir-v1.0.0-nq.test.txt + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.tsv.gz + qrel: qrels.beir-v1.0.0-nq.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.5413 - R@100: - - 0.9415 - R@1000: - - 0.9859 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0002 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml index c47b4a652..dc7d45cec 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-quora.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Quora" - id: test - path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-quora.test.txt + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-quora.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.8890 - R@100: - - 0.9967 - R@1000: - - 0.9998 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml index ac5a75722..b0cb44683 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-quora.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet -index_path: indexes/parquet/quora +index_path: indexes/lucene-flat.beir-v1.0.0-quora.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Quora" - id: test - path: topics.beir-v1.0.0-quora.test.tsv.gz - qrel: qrels.beir-v1.0.0-quora.test.txt + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.tsv.gz + qrel: qrels.beir-v1.0.0-quora.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.8890 - R@100: - - 0.9967 - R@1000: - - 0.9998 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml index 7ba7a0584..4e9749ef7 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Robust04" - id: test - path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-robust04.test.txt + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4465 - R@100: - - 0.3507 - R@1000: - - 0.5981 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml index 37dcf3b83..cd190ed2a 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet -index_path: indexes/parquet/robust04 +index_path: indexes/lucene-flat.beir-v1.0.0-robust04.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Robust04" - id: test - path: topics.beir-v1.0.0-robust04.test.tsv.gz - qrel: qrels.beir-v1.0.0-robust04.test.txt + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.tsv.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4465 - R@100: - - 0.3507 - R@1000: - - 0.5981 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.0001 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml index 0f03cfbf2..f7ea0c499 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): SCIDOCS" - id: test - path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-scidocs.test.txt + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.2170 - R@100: - - 0.4959 - R@1000: - - 0.7824 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml index 953d845c4..60d563f0f 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet -index_path: indexes/parquet/scidocs +index_path: indexes/lucene-flat.beir-v1.0.0-scidocs.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): SCIDOCS" - id: test - path: topics.beir-v1.0.0-scidocs.test.tsv.gz - qrel: qrels.beir-v1.0.0-scidocs.test.txt + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.tsv.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.2170 - R@100: - - 0.4959 - R@1000: - - 0.7824 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml index 667f7ed98..e381363c0 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): SciFact" - id: test - path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-scifact.test.txt + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery - -threads 16 -hits 1000 - results: - nDCG@10: - - 0.7408 - R@100: - - 0.9667 - R@1000: - - 0.9967 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml index 582e01ae3..bb86b1f6c 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet -index_path: indexes/parquet/scifact +index_path: indexes/lucene-flat.beir-v1.0.0-scifact.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): SciFact" - id: test - path: topics.beir-v1.0.0-scifact.test.tsv.gz - qrel: qrels.beir-v1.0.0-scifact.test.txt + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.tsv.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery - -threads 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.7408 - R@100: - - 0.9667 - R@1000: - - 0.9967 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml index 6def8c01f..dcc96fbaf 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Signal-1M" - id: test - path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-signal1m.test.txt + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.2886 - R@100: - - 0.3112 - R@1000: - - 0.5331 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml index fa3a186d4..93377d4a9 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet -index_path: indexes/parquet/signal1m +index_path: indexes/lucene-flat.beir-v1.0.0-signal1m.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Signal-1M" - id: test - path: topics.beir-v1.0.0-signal1m.test.tsv.gz - qrel: qrels.beir-v1.0.0-signal1m.test.txt + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.tsv.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.2886 - R@100: - - 0.3112 - R@1000: - - 0.5331 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml index 0579a55d5..37ae01eb3 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): TREC-COVID" - id: test - path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-trec-covid.test.txt + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.7814 - R@100: - - 0.1406 - R@1000: - - 0.4768 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml index e1ec6e4f8..d80eb3594 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet -index_path: indexes/parquet/trec-covid +index_path: indexes/lucene-flat.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): TREC-COVID" - id: test - path: topics.beir-v1.0.0-trec-covid.test.tsv.gz - qrel: qrels.beir-v1.0.0-trec-covid.test.txt + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.7814 - R@100: - - 0.1406 - R@1000: - - 0.4768 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0004 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml index 4568c23a8..22a3dc387 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): TREC-NEWS" - id: test - path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-trec-news.test.txt + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.4425 - R@100: - - 0.4992 - R@1000: - - 0.7875 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml index 0ba98ad01..06069808a 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet -index_path: indexes/parquet/trec-news +index_path: indexes/lucene-flat.beir-v1.0.0-trec-news.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): TREC-NEWS" - id: test - path: topics.beir-v1.0.0-trec-news.test.tsv.gz - qrel: qrels.beir-v1.0.0-trec-news.test.txt + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4425 - R@100: - - 0.4992 - R@1000: - - 0.7875 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 + tolerance: + nDCG@10: + - 0.0002 + R@100: + - 0.0001 + R@1000: + - 0.0001 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml index 0081a05b0..313fd09ff 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,3 +1,4 @@ +--- corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet @@ -9,45 +10,44 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Webis-Touche2020" - id: test - path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt models: -- name: bge-flat-cached - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 - results: - nDCG@10: - - 0.2570 - R@100: - - 0.4857 - R@1000: - - 0.8298 + - name: bge-flat-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml index e1f820509..a7dbce5ea 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.onnx.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet -index_path: indexes/parquet/webis-touche2020 +index_path: indexes/lucene-flat.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: "" metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Webis-Touche2020" - id: test - path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz - qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt models: -- name: bge-flat-onnx - display: BGE-base-en-v1.5 - type: flat - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.2570 - R@100: - - 0.4857 - R@1000: - - 0.8298 + - name: bge-flat-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 + tolerance: + nDCG@10: + - 0.0001 + R@100: + - 0.0001 + R@1000: + - 0.0001 From e5ea64cbcbdb8d463b8ff3c1e0438a4a4c114b09 Mon Sep 17 00:00:00 2001 From: lintool Date: Mon, 23 Sep 2024 19:30:19 -0400 Subject: [PATCH 2/4] Tweak HNSW. --- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...na.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...sq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...er.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...id.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...sh.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ng.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...is.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ca.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...cs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...rs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ts.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...rs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ss.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ty.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...er.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...qa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...qa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...us.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ra.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...cs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ct.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...id.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...ws.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- ....bge-base-en-v1.5.parquet.hnsw.cached.yaml | 83 +++++++++--------- ...20.bge-base-en-v1.5.parquet.hnsw.onnx.yaml | 85 ++++++++++--------- 58 files changed, 2639 insertions(+), 2233 deletions(-) diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml index a89ca55d1..d365de211 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet -index_path: indexes/parquet/arguana +index_path: indexes/lucene-hnsw.beir-v1.0.0-arguana.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): ArguAna" - id: test - path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-arguana.test.txt + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.6361 - R@100: - - 0.9915 - R@1000: - - 0.9964 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 95710334b..afd5f7b8f 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet -index_path: indexes/parquet/arguana +index_path: indexes/lucene-hnsw.beir-v1.0.0-arguana.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): ArguAna" - id: test - path: topics.beir-v1.0.0-arguana.test.tsv.gz - qrel: qrels.beir-v1.0.0-arguana.test.txt + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.tsv.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.6361 - R@100: - - 0.9915 - R@1000: - - 0.9964 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.02 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 4d13f0fc9..e40a7721d 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet -index_path: indexes/parquet/bioasq +index_path: indexes/lucene-hnsw.beir-v1.0.0-bioasq.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): BioASQ" - id: test - path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-bioasq.test.txt + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 5000 - results: - nDCG@10: - - 0.4149 - R@100: - - 0.6317 - R@1000: - - 0.8059 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 2000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.03 + R@1000: + - 0.04 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 4b12f744c..c1f0454f6 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet -index_path: indexes/parquet/bioasq +index_path: indexes/lucene-hnsw.beir-v1.0.0-bioasq.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 500 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): BioASQ" - id: test - path: topics.beir-v1.0.0-bioasq.test.tsv.gz - qrel: qrels.beir-v1.0.0-bioasq.test.txt + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.tsv.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 5000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4149 - R@100: - - 0.6317 - R@1000: - - 0.8059 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 2000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.03 + R@1000: + - 0.04 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 39361e212..85aa8d0f9 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet -index_path: indexes/parquet/climate-fever +index_path: indexes/lucene-hnsw.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Climate-FEVER" - id: test - path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-climate-fever.test.txt + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.3119 - R@100: - - 0.6362 - R@1000: - - 0.8307 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index c770ad1a5..0849f067b 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet -index_path: indexes/parquet/climate-fever +index_path: indexes/lucene-hnsw.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Climate-FEVER" - id: test - path: topics.beir-v1.0.0-climate-fever.test.tsv.gz - qrel: qrels.beir-v1.0.0-climate-fever.test.txt + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3119 - R@100: - - 0.6362 - R@1000: - - 0.8307 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml index e65786088..12a54c978 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet -index_path: indexes/parquet/cqadupstack-android +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-android" - id: test - path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.5075 - R@100: - - 0.8454 - R@1000: - - 0.9611 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 6b5e1ceaa..6724a360c 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet -index_path: indexes/parquet/cqadupstack-android +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-android" - id: test - path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.5075 - R@100: - - 0.8454 - R@1000: - - 0.9611 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml index ae630e78f..dec03bbde 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet -index_path: indexes/parquet/cqadupstack-english +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-english" - id: test - path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4857 - R@100: - - 0.7587 - R@1000: - - 0.8839 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 0e798089b..e5bcf5a05 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet -index_path: indexes/parquet/cqadupstack-english +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-english" - id: test - path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4857 - R@100: - - 0.7587 - R@1000: - - 0.8839 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 0542cdae6..068193b47 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet -index_path: indexes/parquet/cqadupstack-gaming +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-gaming" - id: test - path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.5965 - R@100: - - 0.9036 - R@1000: - - 0.9719 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 95e0667c4..12104e64d 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet -index_path: indexes/parquet/cqadupstack-gaming +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-gaming" - id: test - path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.5965 - R@100: - - 0.9036 - R@1000: - - 0.9719 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 2a67b5b2e..0a4d0e5e3 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet -index_path: indexes/parquet/cqadupstack-gis +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-gis" - id: test - path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4127 - R@100: - - 0.7682 - R@1000: - - 0.9117 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index f60854cee..476721a3b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet -index_path: indexes/parquet/cqadupstack-gis +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-gis" - id: test - path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4127 - R@100: - - 0.7682 - R@1000: - - 0.9117 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml index c8ef4336b..8cf32eee4 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet -index_path: indexes/parquet/cqadupstack-mathematica +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-mathematica" - id: test - path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.3163 - R@100: - - 0.6922 - R@1000: - - 0.8810 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 1445a34e2..76beffc70 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet -index_path: indexes/parquet/cqadupstack-mathematica +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-mathematica" - id: test - path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3163 - R@100: - - 0.6922 - R@1000: - - 0.8810 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml index d119cf03a..686812b2c 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet -index_path: indexes/parquet/cqadupstack-physics +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-physics" - id: test - path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4722 - R@100: - - 0.8081 - R@1000: - - 0.9406 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 884097cc7..0bdcb639a 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet -index_path: indexes/parquet/cqadupstack-physics +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-physics" - id: test - path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4722 - R@100: - - 0.8081 - R@1000: - - 0.9406 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 138fb8efd..5b1831541 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet -index_path: indexes/parquet/cqadupstack-programmers +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-programmers" - id: test - path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4242 - R@100: - - 0.7856 - R@1000: - - 0.9348 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index ae70952f3..8476626ae 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet -index_path: indexes/parquet/cqadupstack-programmers +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-programmers" - id: test - path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4242 - R@100: - - 0.7856 - R@1000: - - 0.9348 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml index f9602e3dc..ec94eee26 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet -index_path: indexes/parquet/cqadupstack-stats +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-stats" - id: test - path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.3732 - R@100: - - 0.6727 - R@1000: - - 0.8445 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 1cb01597f..b41f478a2 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet -index_path: indexes/parquet/cqadupstack-stats +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-stats" - id: test - path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3732 - R@100: - - 0.6727 - R@1000: - - 0.8445 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml index e253333f9..ae55d3dd9 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet -index_path: indexes/parquet/cqadupstack-tex +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-tex" - id: test - path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.3115 - R@100: - - 0.6486 - R@1000: - - 0.8537 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 725d8f8ee..e97051cb3 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet -index_path: indexes/parquet/cqadupstack-tex +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-tex" - id: test - path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3115 - R@100: - - 0.6486 - R@1000: - - 0.8537 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 7f9d3fadf..3af11991d 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet -index_path: indexes/parquet/cqadupstack-unix +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-unix" - id: test - path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4219 - R@100: - - 0.7797 - R@1000: - - 0.9237 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 55beec0f1..1d80deddf 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet -index_path: indexes/parquet/cqadupstack-unix +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-unix" - id: test - path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4219 - R@100: - - 0.7797 - R@1000: - - 0.9237 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml index a2b59eb90..b51d7793b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet -index_path: indexes/parquet/cqadupstack-webmasters +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-webmasters" - id: test - path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7774 - R@1000: - - 0.9380 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index a13e32715..4ba696841 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet -index_path: indexes/parquet/cqadupstack-webmasters +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-webmasters" - id: test - path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7774 - R@1000: - - 0.9380 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 888acad0a..4dab99dba 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet -index_path: indexes/parquet/cqadupstack-wordpress +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): CQADupStack-wordpress" - id: test - path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.3547 - R@100: - - 0.7065 - R@1000: - - 0.8861 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 1326c2a8e..9df2c5d61 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet -index_path: indexes/parquet/cqadupstack-wordpress +index_path: indexes/lucene-hnsw.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): CQADupStack-wordpress" - id: test - path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz - qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3547 - R@100: - - 0.7065 - R@1000: - - 0.8861 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.004 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 1b789cb5c..efe50654b 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet -index_path: indexes/parquet/dbpedia-entity +index_path: indexes/lucene-hnsw.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): DBPedia" - id: test - path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4074 - R@100: - - 0.5303 - R@1000: - - 0.7833 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.008 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 2e16038e4..94a922fca 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet -index_path: indexes/parquet/dbpedia-entity +index_path: indexes/lucene-hnsw.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): DBPedia" - id: test - path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz - qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4074 - R@100: - - 0.5303 - R@1000: - - 0.7833 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.008 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 29c01d6b4..2e892e5fe 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet -index_path: indexes/parquet/fever +index_path: indexes/lucene-hnsw.beir-v1.0.0-fever.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): FEVER" - id: test - path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-fever.test.txt + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fever.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.8630 - R@100: - - 0.9719 - R@1000: - - 0.9855 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 + tolerance: + nDCG@10: + - 0.008 + R@100: + - 0.015 + R@1000: + - 0.015 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 1339bfd65..e372acb31 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet -index_path: indexes/parquet/fever +index_path: indexes/lucene-hnsw.beir-v1.0.0-fever.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): FEVER" - id: test - path: topics.beir-v1.0.0-fever.test.tsv.gz - qrel: qrels.beir-v1.0.0-fever.test.txt + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-fever.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.8630 - R@100: - - 0.9719 - R@1000: - - 0.9855 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 + tolerance: + nDCG@10: + - 0.008 + R@100: + - 0.015 + R@1000: + - 0.015 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 421a0bdf4..35e32eddb 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet -index_path: indexes/parquet/fiqa +index_path: indexes/lucene-hnsw.beir-v1.0.0-fiqa.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): FiQA-2018" - id: test - path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-fiqa.test.txt + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7415 - R@1000: - - 0.9083 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.004 + R@1000: + - 0.007 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index a8755f7cb..f4540dca9 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet -index_path: indexes/parquet/fiqa +index_path: indexes/lucene-hnsw.beir-v1.0.0-fiqa.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): FiQA-2018" - id: test - path: topics.beir-v1.0.0-fiqa.test.tsv.gz - qrel: qrels.beir-v1.0.0-fiqa.test.txt + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4065 - R@100: - - 0.7415 - R@1000: - - 0.9083 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.004 + R@1000: + - 0.007 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml index a4a906ffd..e888c067e 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet -index_path: indexes/parquet/hotpotqa +index_path: indexes/lucene-hnsw.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): HotpotQA" - id: test - path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.7259 - R@100: - - 0.8727 - R@1000: - - 0.9424 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.02 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 682caa635..7ead059df 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet -index_path: indexes/parquet/hotpotqa +index_path: indexes/lucene-hnsw.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): HotpotQA" - id: test - path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz - qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.7259 - R@100: - - 0.8727 - R@1000: - - 0.9424 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.02 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 6c40a1d46..08611bb30 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet -index_path: indexes/parquet/nfcorpus +index_path: indexes/lucene-hnsw.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): NFCorpus" - id: test - path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.3735 - R@100: - - 0.3368 - R@1000: - - 0.6622 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 3b8ba633c..f9e5a5309 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet -index_path: indexes/parquet/nfcorpus +index_path: indexes/lucene-hnsw.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): NFCorpus" - id: test - path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz - qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.3735 - R@100: - - 0.3368 - R@1000: - - 0.6622 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 4733708f4..09b823857 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-nq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet -index_path: indexes/parquet/nq +index_path: indexes/lucene-hnsw.beir-v1.0.0-nq.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): NQ" - id: test - path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-nq.test.txt + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nq.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.5413 - R@100: - - 0.9415 - R@1000: - - 0.9859 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.008 + R@1000: + - 0.009 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 2d6b75f9f..32a174134 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-nq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet -index_path: indexes/parquet/nq +index_path: indexes/lucene-hnsw.beir-v1.0.0-nq.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): NQ" - id: test - path: topics.beir-v1.0.0-nq.test.tsv.gz - qrel: qrels.beir-v1.0.0-nq.test.txt + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.tsv.gz + qrel: qrels.beir-v1.0.0-nq.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.5413 - R@100: - - 0.9415 - R@1000: - - 0.9859 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.007 + R@1000: + - 0.009 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 6b5a3a716..b83b85c5e 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-quora.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet -index_path: indexes/parquet/quora +index_path: indexes/lucene-hnsw.beir-v1.0.0-quora.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Quora" - id: test - path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-quora.test.txt + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-quora.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.8890 - R@100: - - 0.9967 - R@1000: - - 0.9998 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 6d8eb9b5d..2c6dcf6c2 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-quora.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet -index_path: indexes/parquet/quora +index_path: indexes/lucene-hnsw.beir-v1.0.0-quora.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Quora" - id: test - path: topics.beir-v1.0.0-quora.test.tsv.gz - qrel: qrels.beir-v1.0.0-quora.test.txt + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.tsv.gz + qrel: qrels.beir-v1.0.0-quora.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.8890 - R@100: - - 0.9967 - R@1000: - - 0.9998 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 975db8d44..58d9a9163 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet -index_path: indexes/parquet/robust04 +index_path: indexes/lucene-hnsw.beir-v1.0.0-robust04.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Robust04" - id: test - path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-robust04.test.txt + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4465 - R@100: - - 0.3507 - R@1000: - - 0.5981 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 0b7a0507d..40ff49214 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet -index_path: indexes/parquet/robust04 +index_path: indexes/lucene-hnsw.beir-v1.0.0-robust04.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Robust04" - id: test - path: topics.beir-v1.0.0-robust04.test.tsv.gz - qrel: qrels.beir-v1.0.0-robust04.test.txt + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.tsv.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4465 - R@100: - - 0.3507 - R@1000: - - 0.5981 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.002 + R@1000: + - 0.006 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 57acac5d9..13431bf04 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet -index_path: indexes/parquet/scidocs +index_path: indexes/lucene-hnsw.beir-v1.0.0-scidocs.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): SCIDOCS" - id: test - path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-scidocs.test.txt + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.2170 - R@100: - - 0.4959 - R@1000: - - 0.7824 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 31f7c541c..613b2c2e2 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet -index_path: indexes/parquet/scidocs +index_path: indexes/lucene-hnsw.beir-v1.0.0-scidocs.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): SCIDOCS" - id: test - path: topics.beir-v1.0.0-scidocs.test.tsv.gz - qrel: qrels.beir-v1.0.0-scidocs.test.txt + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.tsv.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.2170 - R@100: - - 0.4959 - R@1000: - - 0.7824 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 448a498e5..df437bd85 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet -index_path: indexes/parquet/scifact +index_path: indexes/lucene-hnsw.beir-v1.0.0-scifact.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): SciFact" - id: test - path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-scifact.test.txt + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -removeQuery - -threads 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.7408 - R@100: - - 0.9667 - R@1000: - - 0.9967 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index b1fd40d0f..d3d6331e1 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet -index_path: indexes/parquet/scifact +index_path: indexes/lucene-hnsw.beir-v1.0.0-scifact.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): SciFact" - id: test - path: topics.beir-v1.0.0-scifact.test.tsv.gz - qrel: qrels.beir-v1.0.0-scifact.test.txt + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.tsv.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -removeQuery - -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.7408 - R@100: - - 0.9667 - R@1000: - - 0.9967 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml index ff0686a07..e4a4c3829 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet -index_path: indexes/parquet/signal1m +index_path: indexes/lucene-hnsw.beir-v1.0.0-signal1m.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Signal-1M" - id: test - path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-signal1m.test.txt + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.2886 - R@100: - - 0.3112 - R@1000: - - 0.5331 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 + tolerance: + nDCG@10: + - 0.015 + R@100: + - 0.03 + R@1000: + - 0.05 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index dd7e401a8..0c91b16a8 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet -index_path: indexes/parquet/signal1m +index_path: indexes/lucene-hnsw.beir-v1.0.0-signal1m.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Signal-1M" - id: test - path: topics.beir-v1.0.0-signal1m.test.tsv.gz - qrel: qrels.beir-v1.0.0-signal1m.test.txt + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.tsv.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.2886 - R@100: - - 0.3112 - R@1000: - - 0.5331 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 + tolerance: + nDCG@10: + - 0.015 + R@100: + - 0.03 + R@1000: + - 0.045 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml index c72f704fd..0f4e8569c 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet -index_path: indexes/parquet/trec-covid +index_path: indexes/lucene-hnsw.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): TREC-COVID" - id: test - path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-trec-covid.test.txt + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.7814 - R@100: - - 0.1406 - R@1000: - - 0.4768 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index d2760726e..90e9b6698 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet -index_path: indexes/parquet/trec-covid +index_path: indexes/lucene-hnsw.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): TREC-COVID" - id: test - path: topics.beir-v1.0.0-trec-covid.test.tsv.gz - qrel: qrels.beir-v1.0.0-trec-covid.test.txt + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.7814 - R@100: - - 0.1406 - R@1000: - - 0.4768 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 3e6634e24..78b2d2e26 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet -index_path: indexes/parquet/trec-news +index_path: indexes/lucene-hnsw.beir-v1.0.0-trec-news.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): TREC-NEWS" - id: test - path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-trec-news.test.txt + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.4425 - R@100: - - 0.4992 - R@1000: - - 0.7875 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.01 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 94f7848ea..a5cb32ba2 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet -index_path: indexes/parquet/trec-news +index_path: indexes/lucene-hnsw.beir-v1.0.0-trec-news.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): TREC-NEWS" - id: test - path: topics.beir-v1.0.0-trec-news.test.tsv.gz - qrel: qrels.beir-v1.0.0-trec-news.test.txt + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.4425 - R@100: - - 0.4992 - R@1000: - - 0.7875 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.009 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml index 1db44bfd1..37fbe6f18 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.cached.yaml @@ -1,7 +1,8 @@ +--- corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet -index_path: indexes/parquet/webis-touche2020 +index_path: indexes/lucene-hnsw.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator @@ -9,45 +10,51 @@ index_threads: 16 index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: JsonStringVector topics: -- name: "BEIR (v1.0.0): Webis-Touche2020" - id: test - path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz - qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt models: -- name: bge-hnsw-cached - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField vector -removeQuery -threads - 16 -hits 1000 -efSearch 1000 - results: - nDCG@10: - - 0.2570 - R@100: - - 0.4857 - R@1000: - - 0.8298 + - name: bge-hnsw-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml index 49f5fc444..8403e4ee9 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw.onnx.yaml @@ -1,53 +1,60 @@ +--- corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet -index_path: indexes/parquet/webis-touche2020 +index_path: indexes/lucene-hnsw.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ index_type: hnsw collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: -- metric: nDCG@10 - command: bin/trec_eval - params: -c -m ndcg_cut.10 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@100 - command: bin/trec_eval - params: -c -m recall.100 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false -- metric: R@1000 - command: bin/trec_eval - params: -c -m recall.1000 - separator: "\t" - parse_index: 2 - metric_precision: 4 - can_combine: false + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false topic_reader: TsvString topics: -- name: "BEIR (v1.0.0): Webis-Touche2020" - id: test - path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz - qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt models: -- name: bge-hnsw-onnx - display: BGE-base-en-v1.5 - type: hnsw - params: -generator VectorQueryGenerator -topicField title -removeQuery -threads - 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 - results: - nDCG@10: - - 0.2570 - R@100: - - 0.4857 - R@1000: - - 0.8298 + - name: bge-hnsw-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.001 From 741a8f59b59e4b7965aeb0c83c8e0f63ea27ffce Mon Sep 17 00:00:00 2001 From: lintool Date: Mon, 23 Sep 2024 19:40:53 -0400 Subject: [PATCH 3/4] Added flat-int8 --- ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 60 +++++++++++++++++++ 58 files changed, 3480 insertions(+) create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..3370320a9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..b7ad87d36 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.tsv.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.03 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..85db65c0e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.004 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..121c01a94 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.tsv.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.003 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..89f57fdb4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.004 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..282ff2b6d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.004 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..33806aaff --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..f8c6107da --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..b3e240f74 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..cd3372e77 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..5b64f12ab --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..11fdfd03e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..8a0b58e18 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.001 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..e7453dce8 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.001 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..80384e0a6 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..63eee3e0a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..43636dccb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..ee94917d9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..52f6d6648 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..89ac29f2c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..5121cbfa7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..8b7ab58d3 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.002 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..cd267888f --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..cbc4276ef --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..bc26120e9 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..71884fab1 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..2b25ddb46 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.003 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..355607095 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..e9c6703d1 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..0c1ccfbc0 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..ee53dace4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.004 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..f83cd3579 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.004 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..9649dfca3 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..a308d3c15 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..541980814 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.002 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..098584ddb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.002 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..4def549cb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..d17944f98 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..b003784fb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.006 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..9eca7ec0e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.007 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..2c2f80dac --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..b08a674b6 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.tsv.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.002 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..77b833d41 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..0f390e558 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.tsv.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..0eaaa93b6 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04 + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.004 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..857396eb5 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04 + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.tsv.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.005 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..1feee96f6 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..d5fc9f812 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.tsv.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.003 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..8e3e9e170 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..8630d024a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.tsv.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..b0d401b8c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 + tolerance: + nDCG@10: + - 0.007 + R@100: + - 0.004 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..4f7d508f4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.tsv.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 + tolerance: + nDCG@10: + - 0.008 + R@100: + - 0.004 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..bccba0b42 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..625c8e504 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..e74c99171 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 + tolerance: + nDCG@10: + - 0.015 + R@100: + - 0.007 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..7837aa6c2 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 + tolerance: + nDCG@10: + - 0.01 + R@100: + - 0.005 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml new file mode 100644 index 000000000..fa444460e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020 + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: + - name: bge-flat-int8-cached + display: BGE-base-en-v1.5 + type: flat + params: -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 + tolerance: + nDCG@10: + - 0.008 + R@100: + - 0.002 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml new file mode 100644 index 000000000..d73981ce7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020 + +index_path: indexes/lucene-flat-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ +index_type: flat +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: + - name: bge-flat-int8-onnx + display: BGE-base-en-v1.5 + type: flat + params: -encoder BgeBaseEn15 -hits 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 + tolerance: + nDCG@10: + - 0.008 + R@100: + - 0.001 + R@1000: + - 0.005 From ba612442d2022296254f9322a05b9924c5064763 Mon Sep 17 00:00:00 2001 From: lintool Date: Mon, 23 Sep 2024 19:55:54 -0400 Subject: [PATCH 4/4] Add flat/hnsw int8 --- ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ ...base-en-v1.5.parquet.flat-int8.cached.yaml | 2 +- ...e-base-en-v1.5.parquet.flat-int8.onnx.yaml | 2 +- ...base-en-v1.5.parquet.hnsw-int8.cached.yaml | 60 +++++++++++++++++++ ...e-base-en-v1.5.parquet.hnsw-int8.onnx.yaml | 60 +++++++++++++++++++ 116 files changed, 3538 insertions(+), 58 deletions(-) create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml create mode 100644 src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 3370320a9..5a27a1e8c 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index b7ad87d36..017f8e93c 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..a7031f2b7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..ebc988ee5 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-arguana.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): ArguAna" + id: test + path: topics.beir-v1.0.0-arguana.test.tsv.gz + qrel: qrels.beir-v1.0.0-arguana.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.6361 + R@100: + - 0.9915 + R@1000: + - 0.9964 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.025 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 85db65c0e..46c311579 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 121c01a94..703d14e52 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..e721ae266 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 2000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 + tolerance: + nDCG@10: + - 0.03 + R@100: + - 0.035 + R@1000: + - 0.06 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..d6aff6cae --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-bioasq.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 500 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): BioASQ" + id: test + path: topics.beir-v1.0.0-bioasq.test.tsv.gz + qrel: qrels.beir-v1.0.0-bioasq.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 2000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4149 + R@100: + - 0.6317 + R@1000: + - 0.8059 + tolerance: + nDCG@10: + - 0.03 + R@100: + - 0.035 + R@1000: + - 0.06 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 89f57fdb4..6470573d3 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 282ff2b6d..d8f880a86 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..8f5d81342 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..0b0c4129d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Climate-FEVER" + id: test + path: topics.beir-v1.0.0-climate-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-climate-fever.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3119 + R@100: + - 0.6362 + R@1000: + - 0.8307 + tolerance: + nDCG@10: + - 0.006 + R@100: + - 0.002 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 33806aaff..ebd095739 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index f8c6107da..6b09565e1 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..8192a646d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.002 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..e473db6ca --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-android" + id: test + path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5075 + R@100: + - 0.8454 + R@1000: + - 0.9611 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index b3e240f74..f7cb819b0 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index cd3372e77..838783d80 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..6e5489961 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.003 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..c87f3ce12 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-english" + id: test + path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4857 + R@100: + - 0.7587 + R@1000: + - 0.8839 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 5b64f12ab..a674311da 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 11fdfd03e..691df77dd 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..c052074af --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..7a1b3c631 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-gaming" + id: test + path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5965 + R@100: + - 0.9036 + R@1000: + - 0.9719 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 8a0b58e18..309c0e109 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index e7453dce8..ccbd7c660 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..542703dbd --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.002 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..e91cc3844 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-gis" + id: test + path: topics.beir-v1.0.0-cqadupstack-gis.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-gis.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4127 + R@100: + - 0.7682 + R@1000: + - 0.9117 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.002 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 80384e0a6..80554de8a 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 63eee3e0a..69b86855e 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..7f560f248 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..ad4e89afb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-mathematica" + id: test + path: topics.beir-v1.0.0-cqadupstack-mathematica.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-mathematica.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3163 + R@100: + - 0.6922 + R@1000: + - 0.8810 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 43636dccb..e63e4f3f1 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index ee94917d9..26c90272a 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..9ed6155a3 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..d16fe48f4 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-physics" + id: test + path: topics.beir-v1.0.0-cqadupstack-physics.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-physics.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4722 + R@100: + - 0.8081 + R@1000: + - 0.9406 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 52f6d6648..1012cb7f6 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 89ac29f2c..787af4851 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..f293f0efc --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..741fa05e2 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-programmers" + id: test + path: topics.beir-v1.0.0-cqadupstack-programmers.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-programmers.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4242 + R@100: + - 0.7856 + R@1000: + - 0.9348 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 5121cbfa7..ffbc2b1da 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 8b7ab58d3..49f610c08 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..af8e6fcfc --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.008 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..8f51df3ee --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-stats" + id: test + path: topics.beir-v1.0.0-cqadupstack-stats.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-stats.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3732 + R@100: + - 0.6727 + R@1000: + - 0.8445 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.002 + R@1000: + - 0.01 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index cd267888f..256bc34f9 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index cbc4276ef..04e8f1760 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..ddbb26396 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..8247625d1 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-tex" + id: test + path: topics.beir-v1.0.0-cqadupstack-tex.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-tex.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3115 + R@100: + - 0.6486 + R@1000: + - 0.8537 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index bc26120e9..5fde63621 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 71884fab1..593b9685b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..d13d14623 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..7a9037ca6 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-unix" + id: test + path: topics.beir-v1.0.0-cqadupstack-unix.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-unix.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4219 + R@100: + - 0.7797 + R@1000: + - 0.9237 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.004 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 2b25ddb46..ccbbcfe95 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 355607095..33c38d482 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..b5cddeb8e --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..b59049f3b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-webmasters" + id: test + path: topics.beir-v1.0.0-cqadupstack-webmasters.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-webmasters.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7774 + R@1000: + - 0.9380 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.004 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index e9c6703d1..48b442277 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 0c1ccfbc0..daf06c798 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..1adae834c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..0ad2bc945 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): CQADupStack-wordpress" + id: test + path: topics.beir-v1.0.0-cqadupstack-wordpress.test.tsv.gz + qrel: qrels.beir-v1.0.0-cqadupstack-wordpress.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3547 + R@100: + - 0.7065 + R@1000: + - 0.8861 + tolerance: + nDCG@10: + - 0.006 + R@100: + - 0.004 + R@1000: + - 0.004 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index ee53dace4..55adf65ef 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index f83cd3579..4d69a88cb 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..252b08223 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 + tolerance: + nDCG@10: + - 0.003 + R@100: + - 0.01 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..4e7ee1270 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): DBPedia" + id: test + path: topics.beir-v1.0.0-dbpedia-entity.test.tsv.gz + qrel: qrels.beir-v1.0.0-dbpedia-entity.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4074 + R@100: + - 0.5303 + R@1000: + - 0.7833 + tolerance: + nDCG@10: + - 0.004 + R@100: + - 0.01 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 9649dfca3..90a0f312f 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-fever.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index a308d3c15..8277eb882 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-fever.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..4c3cbd431 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 + tolerance: + nDCG@10: + - 0.015 + R@100: + - 0.02 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..90c0a3ef0 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fever.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-fever.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): FEVER" + id: test + path: topics.beir-v1.0.0-fever.test.tsv.gz + qrel: qrels.beir-v1.0.0-fever.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8630 + R@100: + - 0.9719 + R@1000: + - 0.9855 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.02 + R@1000: + - 0.02 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 541980814..a0f445943 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 098584ddb..0de03b2a1 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..d6302699d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 + tolerance: + nDCG@10: + - 0.006 + R@100: + - 0.005 + R@1000: + - 0.007 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..d7f94677b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-fiqa.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): FiQA-2018" + id: test + path: topics.beir-v1.0.0-fiqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-fiqa.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4065 + R@100: + - 0.7415 + R@1000: + - 0.9083 + tolerance: + nDCG@10: + - 0.006 + R@100: + - 0.005 + R@1000: + - 0.007 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 4def549cb..42b00396e 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index d17944f98..ef50174d2 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..6afdb4d63 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.03 + R@1000: + - 0.03 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..df0df232f --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): HotpotQA" + id: test + path: topics.beir-v1.0.0-hotpotqa.test.tsv.gz + qrel: qrels.beir-v1.0.0-hotpotqa.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7259 + R@100: + - 0.8727 + R@1000: + - 0.9424 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.025 + R@1000: + - 0.03 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index b003784fb..4596289f4 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 9eca7ec0e..4f6c0a0ce 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..bea3cbbf7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..e94f50778 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): NFCorpus" + id: test + path: topics.beir-v1.0.0-nfcorpus.test.tsv.gz + qrel: qrels.beir-v1.0.0-nfcorpus.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.3735 + R@100: + - 0.3368 + R@1000: + - 0.6622 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.002 + R@1000: + - 0.006 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 2c2f80dac..0a065220f 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-nq.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index b08a674b6..81bb3532d 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-nq.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..48ef4d2fd --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.009 + R@1000: + - 0.009 diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..31c5530f7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-nq.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-nq.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): NQ" + id: test + path: topics.beir-v1.0.0-nq.test.tsv.gz + qrel: qrels.beir-v1.0.0-nq.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.5413 + R@100: + - 0.9415 + R@1000: + - 0.9859 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.009 + R@1000: + - 0.009 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 77b833d41..32017fc90 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-quora.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 0f390e558..0097ebfe7 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-quora.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..4f8e6325c --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..21c60cfbe --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-quora.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-quora.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Quora" + id: test + path: topics.beir-v1.0.0-quora.test.tsv.gz + qrel: qrels.beir-v1.0.0-quora.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.8890 + R@100: + - 0.9967 + R@1000: + - 0.9998 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.001 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 0eaaa93b6..270042750 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 857396eb5..84ab496f3 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..a740a1c6a --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.005 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..c6927b46f --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-robust04.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Robust04" + id: test + path: topics.beir-v1.0.0-robust04.test.tsv.gz + qrel: qrels.beir-v1.0.0-robust04.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4465 + R@100: + - 0.3507 + R@1000: + - 0.5981 + tolerance: + nDCG@10: + - 0.005 + R@100: + - 0.006 + R@1000: + - 0.007 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 1feee96f6..2ba65da0b 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index d5fc9f812..33e027cfd 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..8cc71f815 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..3096b7200 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-scidocs.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): SCIDOCS" + id: test + path: topics.beir-v1.0.0-scidocs.test.tsv.gz + qrel: qrels.beir-v1.0.0-scidocs.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2170 + R@100: + - 0.4959 + R@1000: + - 0.7824 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.003 + R@1000: + - 0.003 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index 8e3e9e170..8ce1960ae 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 8630d024a..325b4d266 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..aa78e69da --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 + tolerance: + nDCG@10: + - 0.001 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..56ae97ecb --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-scifact.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): SciFact" + id: test + path: topics.beir-v1.0.0-scifact.test.tsv.gz + qrel: qrels.beir-v1.0.0-scifact.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7408 + R@100: + - 0.9667 + R@1000: + - 0.9967 + tolerance: + nDCG@10: + - 0.002 + R@100: + - 0.003 + R@1000: + - 0.001 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index b0d401b8c..3b70d8016 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 4f7d508f4..7aea0fd70 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..c8444faa7 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 + tolerance: + nDCG@10: + - 0.025 + R@100: + - 0.03 + R@1000: + - 0.05 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..71f653f9b --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-signal1m.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Signal-1M" + id: test + path: topics.beir-v1.0.0-signal1m.test.tsv.gz + qrel: qrels.beir-v1.0.0-signal1m.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2886 + R@100: + - 0.3112 + R@1000: + - 0.5331 + tolerance: + nDCG@10: + - 0.02 + R@100: + - 0.025 + R@1000: + - 0.05 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index bccba0b42..27f22df56 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 625c8e504..78d59e32e 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..293467eee --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 + tolerance: + nDCG@10: + - 0.006 + R@100: + - 0.002 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..5fcb80458 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): TREC-COVID" + id: test + path: topics.beir-v1.0.0-trec-covid.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-covid.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.7814 + R@100: + - 0.1406 + R@1000: + - 0.4768 + tolerance: + nDCG@10: + - 0.006 + R@100: + - 0.001 + R@1000: + - 0.002 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index e74c99171..23c6de55e 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index 7837aa6c2..98196584c 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..fb6ea3a4d --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 + tolerance: + nDCG@10: + - 0.015 + R@100: + - 0.015 + R@1000: + - 0.03 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..1587a1f65 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-trec-news.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): TREC-NEWS" + id: test + path: topics.beir-v1.0.0-trec-news.test.tsv.gz + qrel: qrels.beir-v1.0.0-trec-news.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.4425 + R@100: + - 0.4992 + R@1000: + - 0.7875 + tolerance: + nDCG@10: + - 0.015 + R@100: + - 0.015 + R@1000: + - 0.03 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml index fa444460e..77cc557d6 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.cached.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml index d73981ce7..ba5c59931 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat-int8.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 -corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet index_path: indexes/lucene-flat-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ index_type: flat diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml new file mode 100644 index 000000000..6598b83b5 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.cached.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: JsonStringVector +topics: + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.bge-base-en-v1.5.jsonl.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: + - name: bge-hnsw-int8-cached + display: BGE-base-en-v1.5 + type: hnsw + params: -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 + tolerance: + nDCG@10: + - 0.008 + R@100: + - 0.002 + R@1000: + - 0.005 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml new file mode 100644 index 000000000..d62262352 --- /dev/null +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.hnsw-int8.onnx.yaml @@ -0,0 +1,60 @@ +--- +corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 +corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet + +index_path: indexes/lucene-hnsw-int8.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ +index_type: hnsw +collection_class: ParquetDenseVectorCollection +generator_class: ParquetDenseVectorDocumentGenerator +index_threads: 16 +index_options: -M 16 -efC 100 -quantize.int8 + +metrics: + - metric: nDCG@10 + command: bin/trec_eval + params: -c -m ndcg_cut.10 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "BEIR (v1.0.0): Webis-Touche2020" + id: test + path: topics.beir-v1.0.0-webis-touche2020.test.tsv.gz + qrel: qrels.beir-v1.0.0-webis-touche2020.test.txt + +models: + - name: bge-hnsw-int8-onnx + display: BGE-base-en-v1.5 + type: hnsw + params: -encoder BgeBaseEn15 -hits 1000 -efSearch 1000 -removeQuery -threads 16 + results: + nDCG@10: + - 0.2570 + R@100: + - 0.4857 + R@1000: + - 0.8298 + tolerance: + nDCG@10: + - 0.008 + R@100: + - 0.003 + R@1000: + - 0.006