From 4e00dcbe91ec11cdd70f30dff9871b0d33bfd5d4 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Tue, 28 Nov 2023 00:21:20 +0100 Subject: [PATCH] CrateDB vector: Fix initialization of vector dimensionality --- .../langchain/vectorstores/cratedb/base.py | 5 ++-- .../vectorstores/test_cratedb.py | 29 +++++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/cratedb/base.py b/libs/langchain/langchain/vectorstores/cratedb/base.py index 552cc6c8dee53..f2f0f29c47757 100644 --- a/libs/langchain/langchain/vectorstores/cratedb/base.py +++ b/libs/langchain/langchain/vectorstores/cratedb/base.py @@ -192,8 +192,9 @@ def create_tables_if_not_exists(self) -> None: """ Need to overwrite because this `Base` is different from parent's `Base`. """ - mf = ModelFactory() - mf.Base.metadata.create_all(self._engine) + if self.BaseModel is None: + raise RuntimeError("Storage models not initialized") + self.BaseModel.metadata.create_all(self._engine) def drop_tables(self) -> None: """ diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py b/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py index bcfc9eebef6d0..0b1e44ab31aa1 100644 --- a/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py +++ b/libs/langchain/tests/integration_tests/vectorstores/test_cratedb.py @@ -23,20 +23,18 @@ FakeEmbeddings, ) +SCHEMA_NAME = os.environ.get("TEST_CRATEDB_DATABASE", "testdrive") + CONNECTION_STRING = CrateDBVectorSearch.connection_string_from_db_params( driver=os.environ.get("TEST_CRATEDB_DRIVER", "crate"), host=os.environ.get("TEST_CRATEDB_HOST", "localhost"), port=int(os.environ.get("TEST_CRATEDB_PORT", "4200")), - database=os.environ.get("TEST_CRATEDB_DATABASE", "testdrive"), + database=SCHEMA_NAME, user=os.environ.get("TEST_CRATEDB_USER", "crate"), password=os.environ.get("TEST_CRATEDB_PASSWORD", ""), ) - -# TODO: Try 1536 after https://github.com/crate/crate/pull/14699. -# ADA_TOKEN_COUNT = 14 -ADA_TOKEN_COUNT = 1024 -# ADA_TOKEN_COUNT = 1536 +ADA_TOKEN_COUNT = 1536 @pytest.fixture @@ -167,6 +165,25 @@ def test_cratedb_texts() -> None: assert output == [Document(page_content="foo")] +def test_cratedb_embedding_dimension() -> None: + """Verify the `embedding` column uses the correct vector dimensionality.""" + texts = ["foo", "bar", "baz"] + docsearch = CrateDBVectorSearch.from_texts( + texts=texts, + collection_name="test_collection", + embedding=ConsistentFakeEmbeddingsWithAdaDimension(), + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + with docsearch.Session() as session: + result = session.execute(sa.text(f"SHOW CREATE TABLE {SCHEMA_NAME}.embedding")) + record = result.first() + if not record: + raise ValueError("No data found") + ddl = record[0] + assert f'"embedding" FLOAT_VECTOR({ADA_TOKEN_COUNT})' in ddl + + def test_cratedb_embeddings() -> None: """Test end to end construction with embeddings and search.""" texts = ["foo", "bar", "baz"]