From ee0d1f583e8f66a98239681b8687a87e3ef93183 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 22 Nov 2023 16:05:23 +0100
Subject: [PATCH] CrateDB vector: Test non-deterministic values by using
 pytest.approx

The test cases can be written substantially more elegant.
---
 .../vectorstores/test_cratedb.py              | 71 ++++---------------
 1 file changed, 14 insertions(+), 57 deletions(-)

diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py b/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py
index 44acb6652123e..bcfc9eebef6d0 100644
--- a/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py
@@ -6,7 +6,7 @@
 """
 import os
 import re
-from typing import Dict, Generator, List, Tuple
+from typing import Dict, Generator, List
 
 import pytest
 import sqlalchemy as sa
@@ -85,20 +85,6 @@ def prune_tables(engine: sa.Engine) -> None:
                 pass
 
 
-def decode_output(
-    output: List[Tuple[Document, float]]
-) -> Tuple[List[Document], List[float]]:
-    """
-    Decode a typical API result into separate `documents` and `scores`.
-    It is needed as utility function in some test cases to compensate
-    for different and/or flaky score values, when compared to the
-    original implementation.
-    """
-    documents = [item[0] for item in output]
-    scores = [round(item[1], 1) for item in output]
-    return documents, scores
-
-
 def ensure_collection(session: sa.orm.Session, name: str) -> None:
     """
     Create a (fake) collection item.
@@ -241,12 +227,11 @@ def test_cratedb_with_filter_match() -> None:
         connection_string=CONNECTION_STRING,
         pre_delete_collection=True,
     )
-    output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "0"})
     # TODO: Original:
     #       assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]  # noqa: E501
-    assert output in [
-        [(Document(page_content="foo", metadata={"page": "0"}), 2.1307645)],
-        [(Document(page_content="foo", metadata={"page": "0"}), 2.3150668)],
+    output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "0"})
+    assert output == [
+        (Document(page_content="foo", metadata={"page": "0"}), pytest.approx(2.2, 0.1))
     ]
 
 
@@ -263,20 +248,9 @@ def test_cratedb_with_filter_distant_match() -> None:
         pre_delete_collection=True,
     )
     output = docsearch.similarity_search_with_score("foo", k=2, filter={"page": "2"})
-    # TODO: Original:
-    #       output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"})  # noqa: E501
-    #       assert output == [
-    #         (Document(page_content="baz", metadata={"page": "2"}), 0.0013003906671379406)  # noqa: E501
-    #       ]
-    documents, scores = decode_output(output)
-    assert documents == [
-        Document(page_content="baz", metadata={"page": "2"}),
-    ]
-    assert scores in [
-        [1.3],
-        [1.5],
-        [1.6],
-        [1.7],
+    # Original score value: 0.0013003906671379406
+    assert output == [
+        (Document(page_content="baz", metadata={"page": "2"}), pytest.approx(1.5, 0.2))
     ]
 
 
@@ -429,19 +403,11 @@ def test_cratedb_with_filter_in_set() -> None:
     output = docsearch.similarity_search_with_score(
         "foo", k=2, filter={"page": {"IN": ["0", "2"]}}
     )
-    # TODO: Original:
-    """
+    # Original score values: 0.0, 0.0013003906671379406
     assert output == [
-        (Document(page_content="foo", metadata={"page": "0"}), 0.0),
-        (Document(page_content="baz", metadata={"page": "2"}), 0.0013003906671379406),
-    ]
-    """
-    documents, scores = decode_output(output)
-    assert documents == [
-        Document(page_content="foo", metadata={"page": "0"}),
-        Document(page_content="baz", metadata={"page": "2"}),
+        (Document(page_content="foo", metadata={"page": "0"}), pytest.approx(3.0, 0.1)),
+        (Document(page_content="baz", metadata={"page": "2"}), pytest.approx(2.2, 0.1)),
     ]
-    assert scores == [3.0, 2.2]
 
 
 def test_cratedb_delete_docs() -> None:
@@ -486,21 +452,12 @@ def test_cratedb_relevance_score() -> None:
     )
 
     output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
-    """
-    # TODO: Original code, where the `distance` is stable.
+    # Original score values: 1.0, 0.9996744261675065, 0.9986996093328621
     assert output == [
-        (Document(page_content="foo", metadata={"page": "0"}), 1.0),
-        (Document(page_content="bar", metadata={"page": "1"}), 0.9996744261675065),
-        (Document(page_content="baz", metadata={"page": "2"}), 0.9986996093328621),
-    ]
-    """
-    documents, scores = decode_output(output)
-    assert documents == [
-        Document(page_content="foo", metadata={"page": "0"}),
-        Document(page_content="bar", metadata={"page": "1"}),
-        Document(page_content="baz", metadata={"page": "2"}),
+        (Document(page_content="foo", metadata={"page": "0"}), pytest.approx(1.4, 0.1)),
+        (Document(page_content="bar", metadata={"page": "1"}), pytest.approx(1.1, 0.1)),
+        (Document(page_content="baz", metadata={"page": "2"}), pytest.approx(0.8, 0.1)),
     ]
-    assert scores == [1.4, 1.1, 0.8]
 
 
 def test_cratedb_retriever_search_threshold() -> None: