From 2f7d969e9b41eba97121dd295746a58126f621b9 Mon Sep 17 00:00:00 2001 From: "U. Artie Eoff" Date: Wed, 29 Jan 2025 13:39:54 -0500 Subject: [PATCH] test_sentence_transformers: use baseline fixture Use the new baseline fixture to validate test results. Signed-off-by: U. Artie Eoff --- .../tests/test_sentence_transformers.json | 106 ++++++++++++++++++ tests/test_sentence_transformers.py | 64 +++++------ 2 files changed, 132 insertions(+), 38 deletions(-) create mode 100644 tests/baselines/fixture/tests/test_sentence_transformers.json diff --git a/tests/baselines/fixture/tests/test_sentence_transformers.json b/tests/baselines/fixture/tests/test_sentence_transformers.json new file mode 100644 index 0000000000..23f4f6af97 --- /dev/null +++ b/tests/baselines/fixture/tests/test_sentence_transformers.json @@ -0,0 +1,106 @@ +{ + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-MiniLM-L12-v2]": { + "gaudi1": { + "measured_throughput": 1252.6261862281467 + }, + "gaudi2": { + "measured_throughput": 3614.2610109716247 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-MiniLM-L6-v2]": { + "gaudi1": { + "measured_throughput": 1109.160132821451 + }, + "gaudi2": { + "measured_throughput": 2615.6975354038477 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-distilroberta-v1]": { + "gaudi1": { + "measured_throughput": 226.90237421623164 + }, + "gaudi2": { + "measured_throughput": 958.5097903298335 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-mpnet-base-v2]": { + "gaudi1": { + "measured_throughput": 164.36556936723508 + }, + "gaudi2": { + "measured_throughput": 762.5595168883357 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/distiluse-base-multilingual-cased-v1]": { + "gaudi1": { + "measured_throughput": 947.844857744754 + }, + "gaudi2": { + "measured_throughput": 3487.3319366004903 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/distiluse-base-multilingual-cased-v2]": { + "gaudi1": { + "measured_throughput": 947.7317550605878 + }, + "gaudi2": { + "measured_throughput": 3807.2486282025716 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-MiniLM-L6-cos-v1]": { + "gaudi1": { + "measured_throughput": 471.14320842607674 + }, + "gaudi2": { + "measured_throughput": 1208.3672807492396 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-distilbert-cos-v1]": { + "gaudi1": { + "measured_throughput": 216.47035182888888 + }, + "gaudi2": { + "measured_throughput": 944.6166139694299 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-mpnet-base-dot-v1]": { + "gaudi1": { + "measured_throughput": 116.82789535569364 + }, + "gaudi2": { + "measured_throughput": 545.3360251829846 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-MiniLM-L3-v2]": { + "gaudi1": { + "measured_throughput": 3029.398417051629 + }, + "gaudi2": { + "measured_throughput": 5734.318427972881 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-albert-small-v2]": { + "gaudi1": { + "measured_throughput": 1139.806075824319 + }, + "gaudi2": { + "measured_throughput": 3896.1911011860166 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2]": { + "gaudi1": { + "measured_throughput": 1253.06776127632 + }, + "gaudi2": { + "measured_throughput": 3558.0778715789693 + } + }, + "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-multilingual-mpnet-base-v2]": { + "gaudi1": { + "measured_throughput": 518.4762252952173 + }, + "gaudi2": { + "measured_throughput": 2392.1654748794062 + } + } +} \ No newline at end of file diff --git a/tests/test_sentence_transformers.py b/tests/test_sentence_transformers.py index 90d97f3005..f9b3033a7f 100644 --- a/tests/test_sentence_transformers.py +++ b/tests/test_sentence_transformers.py @@ -9,45 +9,26 @@ from .test_examples import TIME_PERF_FACTOR -if os.environ.get("GAUDI2_CI", "0") == "1": - # Gaudi2 CI baselines - MODELS_TO_TEST = [ - ("sentence-transformers/all-mpnet-base-v2", 762.5595168883357), - ("sentence-transformers/multi-qa-mpnet-base-dot-v1", 545.3360251829846), - ("sentence-transformers/all-distilroberta-v1", 958.5097903298335), - ("sentence-transformers/all-MiniLM-L12-v2", 3614.2610109716247), - ("sentence-transformers/multi-qa-distilbert-cos-v1", 944.6166139694299), - ("sentence-transformers/all-MiniLM-L6-v2", 2615.6975354038477), - ("sentence-transformers/multi-qa-MiniLM-L6-cos-v1", 1208.3672807492396), - ("sentence-transformers/paraphrase-multilingual-mpnet-base-v2", 2392.1654748794062), - ("sentence-transformers/paraphrase-albert-small-v2", 3896.1911011860166), - ("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 3558.0778715789693), - ("sentence-transformers/paraphrase-MiniLM-L3-v2", 5734.318427972881), - ("sentence-transformers/distiluse-base-multilingual-cased-v1", 3487.3319366004903), - ("sentence-transformers/distiluse-base-multilingual-cased-v2", 3807.2486282025716), - ] -else: - # Gaudi1 CI baselines - MODELS_TO_TEST = [ - ("sentence-transformers/all-mpnet-base-v2", 164.36556936723508), - ("sentence-transformers/multi-qa-mpnet-base-dot-v1", 116.82789535569364), - ("sentence-transformers/all-distilroberta-v1", 226.90237421623164), - ("sentence-transformers/all-MiniLM-L12-v2", 1252.6261862281467), - ("sentence-transformers/multi-qa-distilbert-cos-v1", 216.47035182888888), - ("sentence-transformers/all-MiniLM-L6-v2", 1109.160132821451), - ("sentence-transformers/multi-qa-MiniLM-L6-cos-v1", 471.14320842607674), - ("sentence-transformers/paraphrase-multilingual-mpnet-base-v2", 518.4762252952173), - ("sentence-transformers/paraphrase-albert-small-v2", 1139.806075824319), - ("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 1253.06776127632), - ("sentence-transformers/paraphrase-MiniLM-L3-v2", 3029.398417051629), - ("sentence-transformers/distiluse-base-multilingual-cased-v1", 947.844857744754), - ("sentence-transformers/distiluse-base-multilingual-cased-v2", 947.7317550605878), - ] +MODELS_TO_TEST = [ + "sentence-transformers/all-mpnet-base-v2", + "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "sentence-transformers/all-distilroberta-v1", + "sentence-transformers/all-MiniLM-L12-v2", + "sentence-transformers/multi-qa-distilbert-cos-v1", + "sentence-transformers/all-MiniLM-L6-v2", + "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", + "sentence-transformers/paraphrase-albert-small-v2", + "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", + "sentence-transformers/paraphrase-MiniLM-L3-v2", + "sentence-transformers/distiluse-base-multilingual-cased-v1", + "sentence-transformers/distiluse-base-multilingual-cased-v2", +] def _test_sentence_transformers( model_name: str, - baseline: float, + baseline, ): model = SentenceTransformer(model_name) @@ -74,10 +55,17 @@ def _test_sentence_transformers( end_time = time.perf_counter() diff_time = end_time - start_time measured_throughput = len(sentences) / diff_time + + device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1" + # Only assert the last measured throughtput as the first iteration is used as a warmup - assert measured_throughput >= (2 - TIME_PERF_FACTOR) * baseline + baseline.assertRef( + compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref, + context=[device], + measured_throughput=measured_throughput, + ) -@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST) -def test_compute_embeddings_throughput(model_name: str, baseline: float): +@pytest.mark.parametrize("model_name", MODELS_TO_TEST) +def test_compute_embeddings_throughput(model_name: str, baseline): _test_sentence_transformers(model_name, baseline)