Skip to content

Commit

Permalink
CrateDB vector: Fix initialization of vector dimensionality
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Dec 7, 2023
1 parent 0d96827 commit 4e00dcb
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
5 changes: 3 additions & 2 deletions libs/langchain/langchain/vectorstores/cratedb/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ def create_tables_if_not_exists(self) -> None:
"""
Need to overwrite because this `Base` is different from parent's `Base`.
"""
mf = ModelFactory()
mf.Base.metadata.create_all(self._engine)
if self.BaseModel is None:
raise RuntimeError("Storage models not initialized")
self.BaseModel.metadata.create_all(self._engine)

def drop_tables(self) -> None:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,18 @@
FakeEmbeddings,
)

SCHEMA_NAME = os.environ.get("TEST_CRATEDB_DATABASE", "testdrive")

CONNECTION_STRING = CrateDBVectorSearch.connection_string_from_db_params(
driver=os.environ.get("TEST_CRATEDB_DRIVER", "crate"),
host=os.environ.get("TEST_CRATEDB_HOST", "localhost"),
port=int(os.environ.get("TEST_CRATEDB_PORT", "4200")),
database=os.environ.get("TEST_CRATEDB_DATABASE", "testdrive"),
database=SCHEMA_NAME,
user=os.environ.get("TEST_CRATEDB_USER", "crate"),
password=os.environ.get("TEST_CRATEDB_PASSWORD", ""),
)


# TODO: Try 1536 after https://github.com/crate/crate/pull/14699.
# ADA_TOKEN_COUNT = 14
ADA_TOKEN_COUNT = 1024
# ADA_TOKEN_COUNT = 1536
ADA_TOKEN_COUNT = 1536


@pytest.fixture
Expand Down Expand Up @@ -167,6 +165,25 @@ def test_cratedb_texts() -> None:
assert output == [Document(page_content="foo")]


def test_cratedb_embedding_dimension() -> None:
"""Verify the `embedding` column uses the correct vector dimensionality."""
texts = ["foo", "bar", "baz"]
docsearch = CrateDBVectorSearch.from_texts(
texts=texts,
collection_name="test_collection",
embedding=ConsistentFakeEmbeddingsWithAdaDimension(),
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
with docsearch.Session() as session:
result = session.execute(sa.text(f"SHOW CREATE TABLE {SCHEMA_NAME}.embedding"))
record = result.first()
if not record:
raise ValueError("No data found")
ddl = record[0]
assert f'"embedding" FLOAT_VECTOR({ADA_TOKEN_COUNT})' in ddl


def test_cratedb_embeddings() -> None:
"""Test end to end construction with embeddings and search."""
texts = ["foo", "bar", "baz"]
Expand Down

0 comments on commit 4e00dcb

Please sign in to comment.