Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify handle auth to allow for authentication db #35

Merged
merged 21 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/qc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ jobs:
uses: supercharge/[email protected]
with:
mongodb-version: ${{ matrix.mongodb-version }}

- name: Test with pytest and generate coverage file
run: poetry run pytest -m "not integration and not neo4j" --ignore=src/linkml_store/inference/implementations/rag_inference_engine.py tests
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ RUN = poetry run
CODE = src/linkml_data_browser


test: pytest doctest
test:
${RUN} pytest -m "not integration and not neo4j" --ignore=src/linkml_store/inference/implementations/rag_inference_engine.py tests
test-core: pytest-core doctest
test-full: pytest-full doctest

Expand Down
680 changes: 15 additions & 665 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ black = { version=">=24.0.0", optional = true }
ruff = { version=">=0.6.2", optional = true }
llm = { version="*", optional = true }
tiktoken = { version="*", optional = true }
pymongo = { version="*", optional = true }
pymongo = "^4.11"
neo4j = { version="*", optional = true }
py2neo = { version="*", optional = true }
networkx = { version="*", optional = true }
Expand Down Expand Up @@ -100,7 +100,7 @@ linkml-store = "linkml_store.cli:cli"
linkml-store-api = "linkml_store.webapi.main:start"

[tool.poetry-dynamic-versioning]
enable = true
enable = false
vcs = "git"
style = "pep440"

Expand Down
53 changes: 52 additions & 1 deletion src/linkml_store/api/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,59 @@ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
"""
raise NotImplementedError

def index (
self,
objs: Union[OBJECT, List[OBJECT]],
index_name: Optional[str] = None,
replace: bool = False,
unique: bool = False,
**kwargs,
) -> None:
"""
Index objects in the collection.

:param objs:
:param index_name:
:param replace: replace the index, or not
:param unique: boolean used to declare the index unique or not
:param kwargs:
:return:
"""
raise NotImplementedError

def upsert(self,
objs: Union[OBJECT, List[OBJECT]],
filter_fields: List[str],
update_fields: Union[List[str], None] = None, **kwargs):
"""
Add one or more objects to the collection.

>>> from linkml_store import Client
>>> client = Client()
>>> db = client.attach_database("mongodb", alias="test")
>>> collection = db.create_collection("Person")
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
>>> collection.upsert(objs)

:param objs:
:param filter_fields: List of field names to use as the filter for matching existing collections.
:param update_fields: List of field names to include in the update. If None, all fields are updated.
:param kwargs:

:return:
"""
raise NotImplementedError

def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
"""
Pre-query hook.

This is called before a query is executed. It is used to materialize derivations and indexes.
:param query:
:param kwargs:
:return:
"""
logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
if not self._initialized:
self._materialize_derivations()
self._initialized = True
Expand Down
83 changes: 83 additions & 0 deletions src/linkml_store/api/stores/mongodb/mongodb_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,89 @@ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
del obj["_id"]
self._post_insert_hook(objs)


def index(self,
objs: Union[OBJECT, List[OBJECT]],
index_name: Optional[str] = None,
replace: bool = False,
unique: bool = False,
**kwargs):
"""
Create indexes on the collection.

:param objs: Field(s) to index.
:param index_name: Optional name for the index.
:param replace: If True, the index will be dropped and recreated.
:param unique: If True, creates a unique index (default: False).
"""

if not isinstance(objs, list):
objs = [objs]

existing_indexes = self.mongo_collection.index_information()

for obj in objs:
field_exists = False
index_to_drop = None

# Extract existing index details
for index_name_existing, index_details in existing_indexes.items():
indexed_fields = [field[0] for field in index_details.get("key", [])] # Extract field names

if obj in indexed_fields: # If this field is already indexed
field_exists = True
index_to_drop = index_name_existing if replace else None

# Drop the index if replace=True and index_to_drop is valid
if index_to_drop:
self.mongo_collection.drop_index(index_to_drop)
logging.debug(f"Dropped existing index: {index_to_drop}")

# Create the new index only if it doesn't exist or was dropped
if not field_exists or replace:
self.mongo_collection.create_index(obj, name=index_name, unique=unique)
logging.debug(f"Created new index: {index_name} on field {obj}, unique={unique}")
else:
logging.debug(f"Index already exists for field {obj}, skipping creation.")

def upsert(self,
objs: Union[OBJECT, List[OBJECT]],
filter_fields: List[str],
update_fields: Optional[List[str]] = None,
**kwargs):
"""
Upsert one or more documents into the MongoDB collection.

:param objs: The document(s) to insert or update.
:param filter_fields: List of field names to use as the filter for matching existing documents.
:param update_fields: List of field names to include in the update. If None, all fields are updated.
"""
if not isinstance(objs, list):
objs = [objs]

for obj in objs:
# Ensure filter fields exist in the object
filter_criteria = {field: obj[field] for field in filter_fields if field in obj}
if not filter_criteria:
raise ValueError("At least one valid filter field must be present in each object.")

# Check if a document already exists
existing_doc = self.mongo_collection.find_one(filter_criteria)

if existing_doc:
# Update only changed fields
updates = {key: obj[key] for key in update_fields if key in obj and obj[key] != existing_doc.get(key)}

if updates:
self.mongo_collection.update_one(filter_criteria, {"$set": updates})
logging.debug(f"Updated existing document: {filter_criteria} with {updates}")
else:
logging.debug(f"No changes detected for document: {filter_criteria}. Skipping update.")
else:
# Insert a new document
self.mongo_collection.insert_one(obj)
logging.debug(f"Inserted new document: {obj}")

def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
mongo_filter = self._build_mongo_filter(query.where_clause)
limit = limit or query.limit
Expand Down
10 changes: 7 additions & 3 deletions src/linkml_store/api/stores/mongodb/mongodb_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
from pathlib import Path
from typing import Optional, Union
from urllib.parse import urlparse

from pymongo import MongoClient
from pymongo.database import Database as NativeDatabase
Expand Down Expand Up @@ -38,10 +39,13 @@ def __init__(self, handle: Optional[str] = None, **kwargs):
@property
def _db_name(self) -> str:
if self.handle:
db = self.handle.split("/")[-1]
parsed_url = urlparse(self.handle)
path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
print(path_parts)
db_name = path_parts[0] if path_parts else "default"
else:
db = "default"
return db
db_name = "default"
return db_name

@property
def native_client(self) -> MongoClient:
Expand Down
1 change: 0 additions & 1 deletion tests/test_api/test_filesystem_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def test_insert_and_query(fmt):
assert query_result.rows[0]["name"] == "Alice"
assert query_result.rows[1]["name"] == "Jie"
assert set(query_result.rows[0].keys()) == {"name", "age", "occupation"}

query_result = collection.find({"age": {"$gte": 30}})

# Assert the query results
Expand Down
129 changes: 114 additions & 15 deletions tests/test_api/test_mongodb_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,20 @@

import pytest
import yaml

from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
from pymongo import MongoClient


@pytest.fixture(scope="module")
def mongodb_client():
client = MongoClient("mongodb://localhost:27017")
yield client
client.close()
try:
client = MongoClient("mongodb://localhost:27017", serverSelectionTimeoutMS=2000) # 2s timeout
client.admin.command("ping") # Check MongoDB connectivity
yield client
client.close()
except Exception:
pytest.skip("Skipping tests: MongoDB is not available.")


@pytest.fixture(scope="function")
Expand All @@ -20,16 +25,80 @@ def mongodb_database(mongodb_client):
db.drop_collection("test_collection")


@pytest.fixture(scope="function")
def mongodb_collection(mongodb_client):
"""Fixture to provide a MongoDB test collection, ensuring database and collection creation if necessary."""
if mongodb_client is None:
pytest.skip("Skipping tests: MongoDB client is not available.")

db_name = "test_db"
collection_name = "test_collection"

# Ensure database exists by creating a temporary collection
existing_dbs = mongodb_client.list_database_names()
if db_name not in existing_dbs:
temp_db = mongodb_client[db_name]
temp_db.create_collection("temp_init_collection")
temp_db.drop_collection("temp_init_collection") # Clean up temp collection

# Now attach to the database and collection
db = mongodb_client[db_name]

# Ensure the test collection exists
if collection_name not in db.list_collection_names():
db.create_collection(collection_name)

collection = MongoDBCollection(name=collection_name, parent=db)

yield collection

# Cleanup: Drop the test collection after each test
db.drop_collection(collection_name)


def test_upsert_insert(mongodb_collection):
"""
Test that the upsert method inserts a new document if it does not exist.
"""
obj = {"_id": "1", "name": "Alice", "age": 25, "occupation": "Engineer"}

# Upsert operation: should insert because no document with _id=1 exists
mongodb_collection.upsert(obj, filter_fields=["_id"])

# Check if the document exists in the collection
result = mongodb_collection.mongo_collection.find_one({"_id": "1"})
assert result is not None
assert result["name"] == "Alice"
assert result["age"] == 25
assert result["occupation"] == "Engineer"


def test_upsert_update(mongodb_collection):
"""
Test that the upsert method updates an existing document while preserving unchanged fields.
"""

initial_obj = {"_id": "2", "name": "Bob", "age": 30, "occupation": "Builder"}
mongodb_collection.mongo_collection.insert_one(initial_obj)

updated_obj = {"_id": "2", "age": 35}
mongodb_collection.upsert(updated_obj, filter_fields=["_id"], update_fields=["age"])
result = mongodb_collection.mongo_collection.find_one({"_id": "2"})
assert result is not None
assert result["_id"] == "2"
assert result["age"] == 35 # Should be updated
assert result["name"] == "Bob" # Should remain unchanged
assert result["occupation"] == "Builder" # Should remain unchanged


@pytest.mark.parametrize("handle", ["mongodb://localhost:27017/test_db", None, "mongodb"])
@pytest.mark.integration
def test_insert_and_query(handle):
# Create a MongoDBDatabase instance
"""
Test inserting and querying documents in MongoDB.
"""
db = MongoDBDatabase(handle=handle)

# Create a collection
collection = db.create_collection("test_collection", recreate_if_exists=True)

# Insert a few documents
documents = [
{
"name": "Alice",
Expand All @@ -47,9 +116,7 @@ def test_insert_and_query(handle):
"age": 30,
"occupation": "Builder",
"foods": ["carrot", "date"],
"relationships": [
{"person": "Alice", "relation": "friend"},
],
"relationships": [{"person": "Alice", "relation": "friend"}],
"meta": {"date": "2021-01-01"},
},
{
Expand All @@ -71,11 +138,8 @@ def test_insert_and_query(handle):

query_result = collection.find()
assert query_result.num_rows == len(documents)

# Query the collection
query_result = collection.find({"age": {"$gte": 30}})

# Assert the query results
assert query_result.num_rows == 2
assert len(query_result.rows) == 2
assert query_result.rows[0]["name"] == "Bob"
Expand Down Expand Up @@ -142,3 +206,38 @@ def test_insert_and_query(handle):
where = {fc: v}
results = collection.find(where, limit=-1)
assert results.num_rows == c, f"where {where} failed to find expected"


@pytest.mark.parametrize("unique_flag", [False, True])
def test_index_creation(mongodb_collection, unique_flag):
"""Test the index creation method in MongoDBCollection with and without unique constraint."""

index_field = "test_field"
index_name = f"test_index_{'unique' if unique_flag else 'non_unique'}"
mongodb_collection.mongo_collection.drop_indexes()

# Ensure the collection is empty before creating a unique index
mongodb_collection.mongo_collection.delete_many({})

# Insert **unique, non-null** values for test_field to avoid duplicate key error
mongodb_collection.mongo_collection.insert_many([
{"_id": 1, "test_field": "value1"},
{"_id": 2, "test_field": "value2"},
{"_id": 3, "test_field": "value3"}
])

# Create the index using the method with the unique flag
mongodb_collection.index(index_field, index_name=index_name, replace=True, unique=unique_flag)

# Retrieve indexes after creation
created_indexes = mongodb_collection.mongo_collection.index_information()

# Verify that the new index exists
assert index_name in created_indexes, f"Index {index_name} was not created"

# Check if the index is unique if requested
if unique_flag:
assert created_indexes[index_name]["unique"], f"Index {index_name} should be unique"
else:
assert "unique" not in created_indexes[index_name] or not created_indexes[index_name]["unique"], \
f"Index {index_name} should not be unique"
Loading
Loading