Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Databases object #339

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a3e35d8
Added DatabaseSources to forge.
crisely09 Oct 3, 2022
afd5ba5
Improved DatabaseSource class. Made possible to load mappings from lo…
crisely09 Oct 4, 2022
f03f941
Added option to retrieve mappings from bd_sources also from the forge.
crisely09 Oct 4, 2022
9ace785
Added option to display types of mappings, a model and a store to eac…
crisely09 Oct 5, 2022
1da8883
Updated notebook
crisely09 Oct 5, 2022
9b3c88a
Did a little clean-up of the notebook
crisely09 Oct 5, 2022
a3e4d74
Change SPARQLStore to Specilized UniProtStore. Added Service and spaq…
crisely09 Oct 10, 2022
3e002cd
Added SPARQLWrapper to dependency list.
crisely09 Oct 10, 2022
47f8b0c
Recreated the SPARQLStore, and added the specialized UniProtStore. St…
crisely09 Oct 11, 2022
5200b3b
Modified the search method of the sparql store. Resources are retriev…
crisely09 Oct 12, 2022
a01af1e
Updated notebook with search example.
crisely09 Oct 12, 2022
8d7bea7
Added a draft of UniProt mappings.
crisely09 Oct 12, 2022
686aea4
Make Databases be new archetype. Updated most methods, missing search…
crisely09 Oct 17, 2022
573ae50
Adding the missing file.
crisely09 Oct 18, 2022
2520b7f
Sorted mapping functions. Missing tests.
crisely09 Oct 19, 2022
79b44bc
Improved Protein mappings.
crisely09 Oct 19, 2022
2cfabf3
Implemented mappings methods in RdfModel and update Database classes …
crisely09 Oct 24, 2022
a7da898
Rebased and added unit tests for SPARQLStore and StoreDatabase.
crisely09 Oct 27, 2022
b837bc8
Added WebServiceDatabase. Missing tests and improved download methods.
crisely09 Nov 2, 2022
9433b1f
Adding missing file.
crisely09 Nov 2, 2022
761e114
Some changes.
crisely09 Nov 7, 2022
96c227c
Added DemoDB folder.
crisely09 Nov 7, 2022
ad6b5fd
Make types in db_sources optional
crisely09 Nov 7, 2022
539c75c
Some changes.
crisely09 Nov 8, 2022
264ba9a
Added initial unit tests for WebService Database.
crisely09 Nov 10, 2022
dd7d22d
Update tests for webservice databases.
crisely09 Nov 15, 2022
5fd39bc
Resolve conflicts with old methods
crisely09 Sep 18, 2023
0f64c3d
Merge branch 'master' into dev-resources
MFSY Oct 15, 2023
d94e5a7
Merge branch 'master' into dev-resources
crisely09 Oct 19, 2023
b26fefc
Add mapper and mapping methods to SPARQL store
crisely09 Oct 19, 2023
811ed11
Merge branch 'master' into dev-resources
crisely09 Oct 26, 2023
b443b1c
Small mixes
crisely09 Oct 26, 2023
3c6f952
Add self to tests in classes
crisely09 Oct 26, 2023
29b481e
Merge branch 'master' into dev-resources
crisely09 Oct 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Sorted mapping functions. Missing tests.
  • Loading branch information
crisely09 committed Sep 18, 2023

Verified

This commit was signed with the committer’s verified signature.
torkelrogstad Torkel Rogstad
commit 2520b7f8cfe0a4a1654866115e9bd3f6df6322a6
Empty file.
549 changes: 146 additions & 403 deletions examples/notebooks/getting-started/17 - Database-sources.ipynb

Large diffs are not rendered by default.

55 changes: 46 additions & 9 deletions kgforge/core/archetypes/database.py
Original file line number Diff line number Diff line change
@@ -15,14 +15,14 @@
from abc import ABC, abstractmethod
import json
from pathlib import Path
from typing import Any, Optional, Callable, Dict, List
from typing import Any, Optional, Callable, Dict, List, Union

from kgforge.core import Resource
from kgforge.core.commons.context import Context
from kgforge.core.archetypes import Mapping, Model
from kgforge.core.commons.attributes import repr_class
from kgforge.core.commons.exceptions import ConfigurationError
from kgforge.core.commons.execution import not_supported
from kgforge.core.commons.dictionaries import with_defaults
from kgforge.core.commons.imports import import_class
from kgforge.core.commons.dictionaries import with_defaults

@@ -35,9 +35,10 @@ class Database(ABC):
# POLICY Implementations should not add methods but private functions in the file.
# POLICY Implementations should pass tests/specializations/databases/test_databases.py.

def __init__(self, source: str, **config) -> None:
def __init__(self, forge : Optional["KnowledgeGraphForge"], source: str, **config) -> None:
# POLICY Resolver data access should be lazy, unless it takes less than a second.
# POLICY There could be data caching but it should be aware of changes made in the source.
self._forge: Optional["KnowledgeGraphForge"] = forge
# Model
model_config = config.pop("model")
if model_config.get('origin') == 'directory':
@@ -51,9 +52,10 @@ def __init__(self, source: str, **config) -> None:
with open(context_path, 'r') as jfile:
context_file = json.load(jfile)
self.context = Context(context_file, iri)
if 'model_context' not in config:
config['model_context'] = self.context
except Exception:
self.context = None
config['model_context'] = self.context
elif model_config["origin"] == "store":
with_defaults(
model_config,
@@ -65,7 +67,8 @@ def __init__(self, source: str, **config) -> None:
model_name = model_config.pop("name")
model = import_class(model_name, "models")
self._model: Model = model(**model_config)
config['model_context'] = self._model.context()
if 'model_context' not in config:
config['model_context'] = self._model.context()
else:
raise NotImplementedError('DB Model not yet implemented.')
self.source: str = source
@@ -74,10 +77,6 @@ def __init__(self, source: str, **config) -> None:
def __repr__(self) -> str:
return repr_class(self)

def datatypes(self):
# TODO: add other datatypes used, for instance, inside the mappings
return self.mappings(pretty=False).keys()

def _mappings(self) -> Dict[str, List[str]]:
try:
dirpath = Path(self._dirpath, "mappings")
@@ -107,6 +106,42 @@ def mapping(self, entity: str, type: Callable) -> Mapping:
except AttributeError:
raise ConfigurationError('No directory path was found from the configuration.')

def map_resources(self, resources : Union[List[Resource], Resource],
resource_type : Optional[str] = None) -> Optional[Union[Resource, List[Resource]]]:
datatypes = self.types
mappings = self.mappings()
mapped_resources = []
for resource in resources:
if resource_type is None:
try:
resource_type = resource.type
except AttributeError:
mapped_resources.append(resource)
if resource_type in datatypes:
mapping_class : Mapping = import_class(mappings[resource_type][0], "mappings")
mapping = self.mapping(resource_type, mapping_class)
mapped_resources.append(self._forge.map(self._forge.as_json(resource), mapping))
else:
mapped_resources.append(resource)
return mapped_resources

def datatypes(self):
# TODO: add other datatypes used, for instance, inside the mappings
return list(self.mappings().keys())

@abstractmethod
def search(self, resolvers, *filters, **params) -> Resource:
pass

@abstractmethod
def sparql(self, query: str, debug: bool = False, limit: Optional[int] = None,
offset: Optional[int] = None,**params) -> Resource:
pass

@abstractmethod
def elastic(self, **params) -> Resource:
pass

@property
@abstractmethod
def health(self) -> Callable:
@@ -127,6 +162,8 @@ def _initialize_service(self, source: str, **source_config) -> Any:
return self._service_from_web_service(source, **source_config)
elif origin == "store":
store = import_class(source, "stores")
if source != 'DemoStore':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need this check?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need this check?

source_config['store_context'] = self.context
return self._service_from_store(store, **source_config)
else:
raise ConfigurationError(f"unrecognized DataBase origin '{origin}'")
2 changes: 1 addition & 1 deletion kgforge/core/archetypes/store.py
Original file line number Diff line number Diff line change
@@ -600,7 +600,7 @@ def replace(match: Match) -> str:
return f"{pfx}\n{qr}"


def build_construct_query(data, context):
def resources_from_construct_query(data, context):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fun, we have the same one!

def build_resource_from_construct_query(results: List, context: Context) -> List[Resource]:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ha! I guess it was a "clear need"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for this and the next, what would be the cleanest solution? I would like to use your code as it looks more organized, and I agree it fits well in the SPARQLQueryBuilder

Copy link
Contributor

@ssssarah ssssarah Oct 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cleanest solution in what sense?

(I've put the resource building methods in SparqlQueryBuilder following @MFSY 's comment, but I am of the opinion that there should be another Class for sparql response parsing, as it has nothing to do with query building. I guess the query builder can be treated as a set of helper functions for sparql, in which case maybe renaming it would be better)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the latter is what it's happening. I would not mind also to have just a class for parsing.
The question is ... should I keep things as I have, and whenever your branch is implemented I make another PR to use it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I'm not too sure about that, that's what I had meant on slack that i'd be reluctant to start an implementation for the read only store because it can benefit from work in our 2 branches

Copy link
Contributor

@ssssarah ssssarah Oct 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a matter of which is merged first. But I don't think you should try to make the changes on this branch fit the ones on mine, that will happen when any of the branches needs to rebase to master

subject_triples = {}
for r in data["results"]["bindings"]:
subject = r["subject"]["value"]
41 changes: 25 additions & 16 deletions kgforge/core/forge.py
Original file line number Diff line number Diff line change
@@ -524,21 +524,23 @@ def db_sources(self, mappings: Optional[List[str]] = None,
:param pretty: a boolean
:return: Optional[List[str]]
"""
if mappings is not None:
if mappings is None:
sources = self._db_sources
else:
sources = {}
if isinstance(mappings, list):
for type in mappings:
for db in self._db_sources:
if type in self._db_sources[db].datatypes():
types = self._db_sources[db].datatypes()
if type in types:
sources[db] = self._db_sources[db]
else:
for db in self._db_sources:
if mappings in self._db_sources[db].datatypes():
types = self._db_sources[db].datatypes()
if mappings in types:
sources[db] = self._db_sources[db]
if not sources:
print("No Database sources were found for the given datatype(s)")
else:
sources = self._db_sources
print("No Database sources were found for the given type(s)")
if pretty:
print(*["Available Database sources:", *sources], sep="\n")
else:
@@ -672,7 +674,10 @@ def search(self, *filters, **params) -> List[Resource]:
)
db_source = params.pop('db_source', None)
if db_source:
return self._db_sources[db_source].search(resolvers, *filters, **params)
if db_source in self.db_sources():
return self._db_sources[db_source].search(resolvers, *filters, **params)
else:
raise AttributeError('Selected database was not declared within forge.')
else:
return self._store.search(resolvers, *filters, **params)

@@ -697,7 +702,10 @@ def sparql(
"""
db_source = params.pop('db_source', None)
if db_source:
return self._db_sources[db_source].sparql(query, debug, limit, offset, **params)
if db_source in self.db_sources():
return self._db_sources[db_source].sparql(query, debug, limit, offset, **params)
else:
raise AttributeError('Selected database was not declared within forge.')
else:
return self._store.sparql(query, debug, limit, offset, **params)

@@ -720,7 +728,10 @@ def elastic(
:return: List[Resource]
"""
if db_source:
return self._db_sources[db_source].elastic(query, debug, limit, offset)
if db_source in self.db_sources():
return self._db_sources[db_source].elastic(query, debug, limit, offset)
else:
raise AttributeError('Selected database was not declared within forge.')
else:
return self._store.elastic(query, debug, limit, offset)

@@ -992,7 +1003,7 @@ def get_model_context(self):

def create_db_sources(self, all_config: Optional[Dict[str, Dict[str, str]]],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be called anywhere?

store_config : Optional[Dict[str, Dict[str, str]]],
model_config : Optional[Dict[str, Dict[str, str]]]
model_context: Context
) -> Union[Database, List[Database]]:
crisely09 marked this conversation as resolved.
Show resolved Hide resolved
names = all_config.keys()
dbs = {}
@@ -1001,17 +1012,15 @@ def create_db_sources(self, all_config: Optional[Dict[str, Dict[str, str]]],
origin = config.get('origin')
if origin == 'store':
source = config.get('source')
# Provide store and model configuration to the database sources
if "model" not in config:
config.update(model=deepcopy(model_config))
# Complete configuration of the db store in case is the same
if source == store_config['name']:
# Reuse complete configuration of the store when Nexus is called
if source == store_config['name'] == 'BlueBrainNexus':
store_copy = deepcopy(store_config)
with_defaults(config, store_copy,
"source", "name",
store_copy.keys())
crisely09 marked this conversation as resolved.
Show resolved Hide resolved
config['model_context'] = model_context
config.update(origin=origin)
config.update(origin=origin)
print('Configuration', config)
config['name'] = name
dbs[name] = StoreDatabase(self, **config)
else:
70 changes: 53 additions & 17 deletions kgforge/specializations/databases/store_database.py
Original file line number Diff line number Diff line change
@@ -18,11 +18,12 @@
import copy
from typing import Callable, Optional, Union, Dict, List, Any

from kgforge.core.archetypes import Mapping, Store, Database
from kgforge.core.commons.exceptions import ConfigurationError
from kgforge.core import Resource
from kgforge.core.archetypes import Store, Database
from kgforge.core.commons.execution import not_supported
from kgforge.core.commons.dictionaries import with_defaults
from kgforge.core.commons.imports import import_class
from kgforge.core.wrappings.paths import FilterOperator
from kgforge.specializations.mappers.dictionaries import DictionaryMapper
from kgforge.specializations.stores.bluebrain_nexus import BlueBrainNexus


class StoreDatabase(Database):
@@ -31,13 +32,13 @@ class StoreDatabase(Database):

_REQUIRED = ("name", "origin", "source", "model")

def __init__(self, forge: Optional["KnowledgeGraphForge"], type: str = "Database",
def __init__(self, forge: Optional["KnowledgeGraphForge"],
**config) -> None:
"""
The properties defining the StoreDatabase are:

:param forge: To use forge utilities
name: <the name of the database> - REQUIRED
type: Database - REQUIRED
origin: <'store'> - REQUIRED
source: <a directory path, an URL, or the class name of a Store> - REQUIRED
bucket: <when 'origin' is 'store', a Store bucket>
@@ -52,27 +53,52 @@ def __init__(self, forge: Optional["KnowledgeGraphForge"], type: str = "Database
"""
self._check_properties(**config)
self.name = config.pop('name')
self.type: str = type
self._forge: Optional["KnowledgeGraphForge"] = forge
source = config.pop('source')
super().__init__(source, **config)
super().__init__(forge, source, **config)

def _check_properties(self, **info):
properties = info.keys()
for r in self._REQUIRED:
if r not in properties:
raise ValueError(f'Missing {r} from the properties to define the DatabasSource')
crisely09 marked this conversation as resolved.
Show resolved Hide resolved

def datatypes(self):
@property
def types(self):
# TODO: add other datatypes used, for instance, inside the mappings
return self.mappings().keys()

def search(self, *filters, **params) -> Any:
self.service.search(*filters, **params)

def sparql(self, query: str, debug: bool, limit: int = None, offset: int = None, **params) -> Any:
self.service.sparql(query, debug, limit, offset, **params)
def search(self, resolvers, *filters, **params):
"""Search within the database.

:param keep_original: bool
"""
keep_original = params.pop('keep_original', True)
unmapped_resources = self.service.search(resolvers, *filters, **params)
if isinstance(self.service, BlueBrainNexus) or keep_original:
return unmapped_resources
else:
# Try to find the type of the resources within the filters
resource_type = type_from_filters(filters)
return self.map_resources(unmapped_resources, resource_type=resource_type)

return resource_type

def sparql(self, query: str, debug: bool = False, limit: Optional[int] = None,
offset: Optional[int] = None,**params):
"""Use SPARQL within the database.

:param keep_original: bool
"""
keep_original = params.pop('keep_original', True)
unmapped_resources = self.service.sparql(query, debug, limit, offset, **params)
if keep_original:
return unmapped_resources
else:
return self.map_resources(unmapped_resources)

def elastic(**params):
not_supported()

@staticmethod
def _service_from_directory(dirpath: Path, **source_config) -> Any:
not_supported()
@@ -83,9 +109,19 @@ def _service_from_web_service(endpoint: str, **source_config) -> Any:

@staticmethod
def _service_from_store(store: Callable, **store_config) -> Store:
# Store.
print('store config', store_config)
return store(**store_config)

def health(self) -> Callable:
not_supported()

def type_from_filters(filters):
resource_type = None
if isinstance(filters[0], dict):
if 'type' in filters[0]:
resource_type = filters[0]['type']
else:
for filter in filters:
if 'type' in filter.path and filter.operator is FilterOperator.EQUAL:
resource_type = filter.value
break
return resource_type
4 changes: 2 additions & 2 deletions kgforge/specializations/stores/bluebrain_nexus.py
Original file line number Diff line number Diff line change
@@ -64,7 +64,7 @@
from kgforge.specializations.mappers import DictionaryMapper
from kgforge.specializations.mappings import DictionaryMapping
from kgforge.specializations.stores.nexus.service import BatchAction, Service, _error_message
from kgforge.core.archetypes.store import build_construct_query
from kgforge.core.archetypes.store import resources_from_construct_query
from kgforge.core.commons.es_query_builder import ESQueryBuilder

class CategoryDataType(Enum):
@@ -876,7 +876,7 @@ def _sparql(self, query: str) -> List[Resource]:
_, q_comp = Query.parseString(query)
if q_comp.name == "ConstructQuery":
context = self.model_context or context
return build_construct_query(data, context)
return resources_from_construct_query(data, context)
else:
# SELECT QUERY
results = data["results"]["bindings"]
Loading