From fe2125a4632255bdd978ca151f203b109eb1c005 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Tue, 21 Jan 2025 12:20:14 +0100 Subject: [PATCH 01/15] Added id as optional string to definitions Added extraction of id after instantiation as the class name Fix testing --- bam_masterdata/metadata/definitions.py | 31 ++++++++++++++- tests/metadata/test_definitions.py | 52 +++++++++++++++++--------- tests/metadata/test_entities.py | 3 +- 3 files changed, 66 insertions(+), 20 deletions(-) diff --git a/bam_masterdata/metadata/definitions.py b/bam_masterdata/metadata/definitions.py index e909647..6496a78 100644 --- a/bam_masterdata/metadata/definitions.py +++ b/bam_masterdata/metadata/definitions.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, Field, field_validator, model_validator +from bam_masterdata.utils import code_to_class_name + class DataType(str, Enum): """Enumeration of the data types available in openBIS.""" @@ -74,6 +76,14 @@ class EntityDef(BaseModel): """, ) + id: Optional[str] = Field( + default=None, + description=""" + Identifier of the entity defined as the class name and used to serialize the entity definitions + in other formats. + """, + ) + # TODO check ontology_id, ontology_version, ontology_annotation_id, internal (found in the openBIS docu) @field_validator("code") @@ -115,7 +125,26 @@ def excel_headers(self) -> list[str]: """ Returns the headers for the entity in a format suitable for the openBIS Excel file. """ - return [k.capitalize().replace("_", " ") for k in self.model_fields.keys()] + return [ + k.capitalize().replace("_", " ") + for k in self.model_fields.keys() + if k != "id" + ] + + @model_validator(mode="after") + @classmethod + def model_id(cls, data: Any) -> Any: + """ + Stores the model `id` as the class name from the `code` field. + + Args: + data (Any): The data containing the fields values to validate. + + Returns: + Any: The data with the validated fields. + """ + data.id = code_to_class_name(data.code) + return data class BaseObjectTypeDef(EntityDef): diff --git a/tests/metadata/test_definitions.py b/tests/metadata/test_definitions.py index 88f55dc..ba20ff8 100644 --- a/tests/metadata/test_definitions.py +++ b/tests/metadata/test_definitions.py @@ -47,32 +47,33 @@ def test_fields(self): """Test the existing defined fields of the `EntityDef` class.""" names = list(EntityDef.model_fields.keys()) field_types = [val.annotation for val in list(EntityDef.model_fields.values())] - assert names == ["code", "description"] - assert field_types == [str, str] + assert names == ["code", "description", "id"] + assert field_types == [str, str, Optional[str]] @pytest.mark.parametrize( - "code, description, is_valid", + "code, description, id, is_valid", [ # `code` in capital and separated by underscores - ("EXPERIMENTAL_STEP", "Valid description", True), + ("EXPERIMENTAL_STEP", "Valid description", "ExperimentalStep", True), # `code` starting with $ symbol - ("$NAME", "Valid description", True), + ("$NAME", "Valid description", "Name", True), # `code` separating inheritance with points - ("WELDING_EQUIPMENT.INSTRUMENT", "Valid description", True), + ("WELDING_EQUIPMENT.INSTRUMENT", "Valid description", "Instrument", True), # Invalid `code` - ("INVALID CODE", "Valid description", False), + ("INVALID CODE", "Valid description", None, False), # `description` is not a string - ("EXPERIMENTAL_STEP", 2, False), + ("EXPERIMENTAL_STEP", 2, None, False), # Empty `code` - ("", "Valid description", False), + ("", "Valid description", "", False), ], ) - def test_entity_def(self, code: str, description: str, is_valid: bool): + def test_entity_def(self, code: str, description: str, id: str, is_valid: bool): """Test creation of `EntityDef` and field validation.""" if is_valid: entity = EntityDef(code=code, description=description) assert entity.code == code assert entity.description == description + assert entity.id == id else: with pytest.raises(ValueError): EntityDef(code=code, description=description) @@ -116,8 +117,8 @@ def test_fields(self): field_types = [ val.annotation for val in list(BaseObjectTypeDef.model_fields.values()) ] - assert names == ["code", "description", "validation_script"] - assert field_types == [str, str, Optional[str]] + assert names == ["code", "description", "id", "validation_script"] + assert field_types == [str, str, Optional[str], Optional[str]] class TestCollectionTypeDef: @@ -127,8 +128,8 @@ def test_fields(self): field_types = [ val.annotation for val in list(CollectionTypeDef.model_fields.values()) ] - assert names == ["code", "description", "validation_script"] - assert field_types == [str, str, Optional[str]] + assert names == ["code", "description", "id", "validation_script"] + assert field_types == [str, str, Optional[str], Optional[str]] class TestDatasetTypeDef: @@ -141,6 +142,7 @@ def test_fields(self): assert names == [ "code", "description", + "id", "validation_script", "main_dataset_pattern", "main_dataset_path", @@ -151,6 +153,7 @@ def test_fields(self): Optional[str], Optional[str], Optional[str], + Optional[str], ] @@ -164,11 +167,19 @@ def test_fields(self): assert names == [ "code", "description", + "id", "validation_script", "generated_code_prefix", "auto_generated_codes", ] - assert field_types == [str, str, Optional[str], Optional[str], bool] + assert field_types == [ + str, + str, + Optional[str], + Optional[str], + Optional[str], + bool, + ] @pytest.mark.parametrize( "code, generated_code_prefix, result", @@ -202,6 +213,7 @@ def test_fields(self): assert names == [ "code", "description", + "id", "property_label", "data_type", "vocabulary_code", @@ -212,6 +224,7 @@ def test_fields(self): assert field_types == [ str, str, + Optional[str], str, DataType, Optional[str], @@ -231,6 +244,7 @@ def test_fields(self): assert names == [ "code", "description", + "id", "property_label", "data_type", "vocabulary_code", @@ -246,6 +260,7 @@ def test_fields(self): assert field_types == [ str, str, + Optional[str], str, DataType, Optional[str], @@ -267,8 +282,8 @@ def test_fields(self): field_types = [ val.annotation for val in list(VocabularyTypeDef.model_fields.values()) ] - assert names == ["code", "description", "url_template"] - assert field_types == [str, str, Optional[str]] + assert names == ["code", "description", "id", "url_template"] + assert field_types == [str, str, Optional[str], Optional[str]] class TestVocabularyTerm: @@ -281,8 +296,9 @@ def test_fields(self): assert names == [ "code", "description", + "id", "url_template", "label", "official", ] - assert field_types == [str, str, Optional[str], str, bool] + assert field_types == [str, str, Optional[str], Optional[str], str, bool] diff --git a/tests/metadata/test_entities.py b/tests/metadata/test_entities.py index fefe255..f64e642 100644 --- a/tests/metadata/test_entities.py +++ b/tests/metadata/test_entities.py @@ -12,7 +12,7 @@ def test_model_to_json(self): entity = generate_base_entity() assert ( entity.model_to_json() - == '{"defs": {"code": "MOCKED_ENTITY", "description": "Mockup for an entity definition//Mockup f\\u00fcr eine Entit\\u00e4tsdefinition", "validation_script": null, "generated_code_prefix": "MOCKENT", "auto_generated_codes": true}}' + == '{"defs": {"code": "MOCKED_ENTITY", "description": "Mockup for an entity definition//Mockup f\\u00fcr eine Entit\\u00e4tsdefinition", "id": "MockedEntity", "validation_script": null, "generated_code_prefix": "MOCKENT", "auto_generated_codes": true}}' ) def test_model_to_dict(self): @@ -22,6 +22,7 @@ def test_model_to_dict(self): "defs": { "code": "MOCKED_ENTITY", "description": "Mockup for an entity definition//Mockup für eine Entitätsdefinition", + "id": "MockedEntity", "validation_script": None, "generated_code_prefix": "MOCKENT", "auto_generated_codes": True, From 17c32f6c6b8b390e0dfb89c0d556d6864c478caa Mon Sep 17 00:00:00 2001 From: jpizarro Date: Tue, 21 Jan 2025 17:13:50 +0100 Subject: [PATCH 02/15] Added rdflib to dependencies Added _base_attrs to BaseEntity property Added entities_to_rdf cli --- bam_masterdata/cli/cli.py | 61 ++++++++++++- bam_masterdata/cli/entities_to_rdf.py | 127 ++++++++++++++++++++++++++ bam_masterdata/metadata/entities.py | 118 +++++++++++++++++++----- pyproject.toml | 1 + 4 files changed, 284 insertions(+), 23 deletions(-) create mode 100644 bam_masterdata/cli/entities_to_rdf.py diff --git a/bam_masterdata/cli/cli.py b/bam_masterdata/cli/cli.py index 529bd9f..a2dfc6b 100644 --- a/bam_masterdata/cli/cli.py +++ b/bam_masterdata/cli/cli.py @@ -6,9 +6,11 @@ import click from decouple import config as environ from openpyxl import Workbook +from rdflib import Graph from bam_masterdata.cli.entities_to_excel import entities_to_excel from bam_masterdata.cli.entities_to_json import entities_to_json +from bam_masterdata.cli.entities_to_rdf import entities_to_rdf from bam_masterdata.cli.fill_masterdata import MasterdataCodeGenerator from bam_masterdata.logger import logger from bam_masterdata.utils import ( @@ -211,7 +213,7 @@ def export_to_excel(force_delete, python_path): definitions_module = import_module(module_path=str(definitions_path.resolve())) # Process the modules and save the entities to the openBIS masterdata Excel file - masterdata_file = os.path.join(".", "artifacts", "masterdata.xlsx") + masterdata_file = os.path.join(export_dir, "masterdata.xlsx") wb = Workbook() for i, module_path in enumerate(py_modules): if i == 0: @@ -234,5 +236,62 @@ def export_to_excel(force_delete, python_path): click.echo(f"All masterdata have been generated and saved to {masterdata_file}") +@cli.command( + name="export_to_rdf", + help="Export entities to a RDF/XML file in the path `./artifacts/bam_masterdata.owl`.", +) +@click.option( + "--force-delete", + type=bool, + required=False, + default=False, + help=""" + (Optional) If set to `True`, it will delete the current `./artifacts/` folder and create a new one. Default is `False`. + """, +) +@click.option( + "--python-path", + type=str, + required=False, + default=DATAMODEL_DIR, + help=""" + (Optional) The path to the individual Python module or the directory containing the Python modules to process the datamodel. + Default is `./bam_masterdata/datamodel/`. + """, +) +def export_to_rdf(force_delete, python_path): + # Get the directories from the Python modules and the export directory for the static artifacts + export_dir = os.path.join(".", "artifacts") + + # Delete and create the export directory + delete_and_create_dir( + directory_path=export_dir, + logger=logger, + force_delete=force_delete, + ) + + # Get the Python modules to process the datamodel + py_modules = listdir_py_modules(directory_path=python_path, logger=logger) + # ! Remove the module containing 'vocabulary_types.py' + py_modules = [ + module for module in py_modules if "vocabulary_types.py" not in module + ] + + # Process each module using the `to_json` method of each entity + graph = Graph() + for module_path in py_modules: + entities_to_rdf(graph=graph, module_path=module_path, logger=logger) + + # Saving RDF/XML to file + rdf_output = graph.serialize(format="pretty-xml") + masterdata_file = os.path.join(export_dir, "masterdata.owl") + with open(masterdata_file, "w") as f: + f.write(rdf_output) + + click.echo( + f"All masterdata has been generated in RDF/XML format and saved to {masterdata_file}" + ) + + if __name__ == "__main__": cli() diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py new file mode 100644 index 0000000..70e8069 --- /dev/null +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -0,0 +1,127 @@ +import inspect +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from rdflib import Graph + from structlog._config import BoundLoggerLazyProxy + +import click +from rdflib import Literal, Namespace +from rdflib.namespace import DC, OWL, RDF, RDFS, SKOS + +from bam_masterdata.utils import import_module + +BAM = Namespace("http://bam.de/masterdata/") + + +def rdf_graph_init(g: "Graph") -> None: + # Adding base namespaces + g.bind("dc", DC) + g.bind("owl", OWL) + g.bind("rdf", RDF) + g.bind("rdfs", RDFS) + g.bind("skos", SKOS) + g.bind("bam", BAM) + + # Adding annotation properties from base namespaces + annotation_props = [ + DC.identifier, + DC.type, + SKOS.altLabel, + SKOS.definition, + SKOS.prefLabel, + ] + for prop in annotation_props: + g.add((prop, RDF.type, OWL.AnnotationProperty)) + + # Internal BAM properties + # ? `section`, `ordinal`, `show_in_edit_views`? + bam_props_uri = { + BAM["hasMandatoryProperty"]: [ + (RDF.type, OWL.ObjectProperty), + (RDFS.subPropertyOf, OWL.topObjectProperty), + (RDFS.domain, OWL.Thing), + (SKOS.prefLabel, Literal("hasMandatoryProperty", lang="en")), + ], + BAM["hasOptionalProperty"]: [ + (RDF.type, OWL.ObjectProperty), + (RDFS.subPropertyOf, OWL.topObjectProperty), + (RDFS.domain, OWL.Thing), + (SKOS.prefLabel, Literal("hasOptionalProperty", lang="en")), + ], + } + for prop_uri, obj_properties in bam_props_uri.items(): + for prop in obj_properties: + g.add((prop_uri, prop[0], prop[1])) + + # Adding base entity types objects + for entity in ["PropertyType", "ObjectType", "CollectionType", "DatasetType"]: + entity_uri = BAM[entity] + g.add((entity_uri, RDF.type, OWL.Class)) + g.add((entity_uri, SKOS.prefLabel, Literal(entity, lang="en"))) + + return g + + +def entities_to_rdf( + graph: "Graph", module_path: str, logger: "BoundLoggerLazyProxy" +) -> None: + rdf_graph_init(graph) + + module = import_module(module_path=module_path) + + # Special case of `PropertyTypeDef` in `property_types.py` + # PROPERTY TYPES + # skos:prefLabel used for class names + # skos:definition used for `description` (en, de) + # skos:altLabel used for `property_label` + # dc:identifier used for `code` # ! only defined for internal codes with $ symbol + # dc:type used for `data_type` + if "property_types.py" in module_path: + for name, obj in inspect.getmembers(module): + if name.startswith("_") or name == "PropertyTypeDef": + continue + prop_uri = BAM[obj.id] + + # Define the property as an OWL class inheriting from PropertyType + graph.add((prop_uri, RDF.type, OWL.Class)) + graph.add((prop_uri, RDFS.subClassOf, BAM.PropertyType)) + + # Add attributes like id, code, description in English and Deutsch, property_label, data_type + graph.add((prop_uri, SKOS.prefLabel, Literal(obj.id, lang="en"))) + graph.add((prop_uri, DC.identifier, Literal(obj.code))) + descriptions = obj.description.split("//") + if len(descriptions) > 1: + graph.add( + (prop_uri, SKOS.definition, Literal(descriptions[0], lang="en")) + ) + graph.add( + (prop_uri, SKOS.definition, Literal(descriptions[1], lang="de")) + ) + else: + graph.add( + (prop_uri, SKOS.definition, Literal(obj.description, lang="en")) + ) + graph.add((prop_uri, SKOS.altLabel, Literal(obj.property_label, lang="en"))) + graph.add((prop_uri, DC.type, Literal(obj.data_type.value))) + return None + + # All other datamodel modules + # OBJECT/DATASET/COLLECTION TYPES + # skos:prefLabel used for class names + # skos:definition used for `description` (en, de) + # dc:identifier used for `code` # ! only defined for internal codes with $ symbol + # parents defined from `code` + # assigned properties can be Mandatory or Optional, can be PropertyType or ObjectType + # ? For OBJECT TYPES + # ? `generated_code_prefix`, `auto_generated_codes`? + for name, obj in inspect.getmembers(module, inspect.isclass): + # Ensure the class has the `to_json` method + if not hasattr(obj, "defs") or not callable(getattr(obj, "to_rdf")): + continue + try: + # Instantiate the class and call the method + entity = obj() + entity.to_rdf(namespace=BAM, graph=graph) + except Exception as err: + click.echo(f"Failed to process class {name} in {module_path}: {err}") diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index fb97773..cda5927 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -1,7 +1,12 @@ import json -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional from pydantic import BaseModel, ConfigDict, Field, model_validator +from rdflib import BNode, Literal +from rdflib.namespace import DC, OWL, RDF, RDFS, SKOS + +if TYPE_CHECKING: + from rdflib import Graph, Namespace from bam_masterdata.metadata.definitions import ( CollectionTypeDef, @@ -19,6 +24,32 @@ class BaseEntity(BaseModel): adding new methods that are useful for interfacing with openBIS. """ + @property + def cls_name(self) -> str: + """ + Returns the entity name of the class as a string to speed up checks. This is a property + to be overwritten by each of the abstract entity types. + """ + return self.__class__.__name__ + + @property + def _base_attrs(self) -> list: + """ + List of base properties or terms assigned to an entity type. This are the direct properties or terms + assigned when defining a new entity type. + """ + cls_attrs = self.__class__.__dict__ + base_attrs = [ + attr_name + for attr_name in cls_attrs + if not ( + attr_name.startswith("_") + or callable(cls_attrs[attr_name]) + or attr_name in ["defs", "model_config"] + ) + ] + return [getattr(self, attr_name) for attr_name in base_attrs] + def model_to_json(self, indent: Optional[int] = None) -> str: """ Returns the model as a string in JSON format storing the data `defs` and the property or @@ -52,13 +83,56 @@ def model_to_dict(self) -> dict: dump_json = self.model_to_json() return json.loads(dump_json) - @property - def cls_name(self) -> str: - """ - Returns the entity name of the class as a string to speed up checks. This is a property - to be overwritten by each of the abstract entity types. - """ - return self.__class__.__name__ + # skos:prefLabel used for class names + # skos:definition used for `description` (en, de) + # dc:identifier used for `code` # ! only defined for internal codes with $ symbol + # parents defined from `code` + # assigned properties can be Mandatory or Optional, can be PropertyType or ObjectType + # ? For OBJECT TYPES + # ? `generated_code_prefix`, `auto_generated_codes`? + def to_rdf(self, namespace: "Namespace", graph: "Graph") -> None: + entity_uri = namespace[self.defs.id] + + # Define the entity as an OWL class inheriting from the specific namespace type + graph.add((entity_uri, RDF.type, OWL.Class)) + parent_classes = self.__class__.__bases__ + for parent_class in parent_classes: + if issubclass(parent_class, BaseEntity) and parent_class != BaseEntity: + parent_uri = namespace[parent_class.__name__] + graph.add((entity_uri, RDFS.subClassOf, parent_uri)) + + # Add attributes like id, code, description in English and Deutsch, property_label, data_type + graph.add((entity_uri, SKOS.prefLabel, Literal(self.defs.id, lang="en"))) + graph.add((entity_uri, DC.identifier, Literal(self.defs.code))) + descriptions = self.defs.description.split("//") + if len(descriptions) > 1: + graph.add( + (entity_uri, SKOS.definition, Literal(descriptions[0], lang="en")) + ) + graph.add( + (entity_uri, SKOS.definition, Literal(descriptions[1], lang="de")) + ) + else: + graph.add( + (entity_uri, SKOS.definition, Literal(self.defs.description, lang="en")) + ) + # Adding properties relationships to the entities + for assigned_prop in self._base_attrs: + prop_uri = namespace[assigned_prop.id] + restriction = BNode() + graph.add((restriction, RDF.type, OWL.Restriction)) + if assigned_prop.mandatory: + graph.add( + (restriction, OWL.onProperty, namespace["hasMandatoryProperty"]) + ) + else: + graph.add( + (restriction, OWL.onProperty, namespace["hasOptionalProperty"]) + ) + graph.add((restriction, OWL.someValuesFrom, prop_uri)) + + # Add the restriction as a subclass of the entity + graph.add((entity_uri, RDFS.subClassOf, restriction)) class ObjectType(BaseEntity): @@ -89,6 +163,13 @@ class ObjectType(BaseEntity): """, ) + @property + def cls_name(self) -> str: + """ + Returns the entity name of the class as a string. + """ + return "ObjectType" + @model_validator(mode="after") @classmethod def model_validator_after_init(cls, data: Any) -> Any: @@ -109,13 +190,6 @@ def model_validator_after_init(cls, data: Any) -> Any: return data - @property - def cls_name(self) -> str: - """ - Returns the entity name of the class as a string. - """ - return "ObjectType" - class VocabularyType(BaseEntity): """ @@ -135,6 +209,13 @@ class VocabularyType(BaseEntity): """, ) + @property + def cls_name(self) -> str: + """ + Returns the entity name of the class as a string. + """ + return "VocabularyType" + @model_validator(mode="after") @classmethod def model_validator_after_init(cls, data: Any) -> Any: @@ -155,13 +236,6 @@ def model_validator_after_init(cls, data: Any) -> Any: return data - @property - def cls_name(self) -> str: - """ - Returns the entity name of the class as a string. - """ - return "VocabularyType" - class CollectionType(ObjectType): @property diff --git a/pyproject.toml b/pyproject.toml index 29ee93a..5595b93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "openpyxl", "click", "pydantic", + "rdflib", ] [project.urls] From 7e9e95faa994828a88657a831161654ab6acdc3d Mon Sep 17 00:00:00 2001 From: jpizarro Date: Wed, 22 Jan 2025 09:21:59 +0100 Subject: [PATCH 03/15] Adding referenceTo and fixing properties of data type OBJECT --- bam_masterdata/cli/entities_to_rdf.py | 37 +++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py index 70e8069..be16ea6 100644 --- a/bam_masterdata/cli/entities_to_rdf.py +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -6,10 +6,10 @@ from structlog._config import BoundLoggerLazyProxy import click -from rdflib import Literal, Namespace +from rdflib import BNode, Literal, Namespace from rdflib.namespace import DC, OWL, RDF, RDFS, SKOS -from bam_masterdata.utils import import_module +from bam_masterdata.utils import code_to_class_name, import_module BAM = Namespace("http://bam.de/masterdata/") @@ -39,20 +39,26 @@ def rdf_graph_init(g: "Graph") -> None: bam_props_uri = { BAM["hasMandatoryProperty"]: [ (RDF.type, OWL.ObjectProperty), - (RDFS.subPropertyOf, OWL.topObjectProperty), - (RDFS.domain, OWL.Thing), + (RDFS.domain, BAM.ObjectType), + (RDFS.range, BAM.PropertyType), (SKOS.prefLabel, Literal("hasMandatoryProperty", lang="en")), ], BAM["hasOptionalProperty"]: [ (RDF.type, OWL.ObjectProperty), - (RDFS.subPropertyOf, OWL.topObjectProperty), - (RDFS.domain, OWL.Thing), + (RDFS.domain, BAM.ObjectType), + (RDFS.range, BAM.PropertyType), (SKOS.prefLabel, Literal("hasOptionalProperty", lang="en")), ], + BAM["referenceTo"]: [ + (RDF.type, OWL.ObjectProperty), + (RDFS.domain, BAM.PropertyType), # Restricting domain to PropertyType + (RDFS.range, BAM.ObjectType), # Explicitly setting range to ObjectType + (SKOS.prefLabel, Literal("referenceTo", lang="en")), + ], } for prop_uri, obj_properties in bam_props_uri.items(): - for prop in obj_properties: - g.add((prop_uri, prop[0], prop[1])) + for prop in obj_properties: # type: ignore + g.add((prop_uri, prop[0], prop[1])) # type: ignore # Adding base entity types objects for entity in ["PropertyType", "ObjectType", "CollectionType", "DatasetType"]: @@ -60,8 +66,6 @@ def rdf_graph_init(g: "Graph") -> None: g.add((entity_uri, RDF.type, OWL.Class)) g.add((entity_uri, SKOS.prefLabel, Literal(entity, lang="en"))) - return g - def entities_to_rdf( graph: "Graph", module_path: str, logger: "BoundLoggerLazyProxy" @@ -104,6 +108,19 @@ def entities_to_rdf( ) graph.add((prop_uri, SKOS.altLabel, Literal(obj.property_label, lang="en"))) graph.add((prop_uri, DC.type, Literal(obj.data_type.value))) + if obj.data_type.value == "OBJECT": + # entity_ref_uri = BAM[code_to_class_name(obj.object_code)] + # graph.add((prop_uri, BAM.referenceTo, entity_ref_uri)) + entity_ref_uri = BAM[code_to_class_name(obj.object_code)] + + # Create a restriction with referenceTo + restriction = BNode() + graph.add((restriction, RDF.type, OWL.Restriction)) + graph.add((restriction, OWL.onProperty, BAM["referenceTo"])) + graph.add((restriction, OWL.someValuesFrom, entity_ref_uri)) + + # Add the restriction as a subclass of the property + graph.add((prop_uri, RDFS.subClassOf, restriction)) return None # All other datamodel modules From 9780764c34f406dd275b37247da937cf0e723cc8 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 23 Jan 2025 11:05:07 +0100 Subject: [PATCH 04/15] Ignoring typing in to_rdf function --- bam_masterdata/metadata/entities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index cda5927..715f3a8 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -1,5 +1,5 @@ import json -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, no_type_check from pydantic import BaseModel, ConfigDict, Field, model_validator from rdflib import BNode, Literal @@ -90,6 +90,7 @@ def model_to_dict(self) -> dict: # assigned properties can be Mandatory or Optional, can be PropertyType or ObjectType # ? For OBJECT TYPES # ? `generated_code_prefix`, `auto_generated_codes`? + @no_type_check def to_rdf(self, namespace: "Namespace", graph: "Graph") -> None: entity_uri = namespace[self.defs.id] From 48b9c3a2c7a7e8465c5bfc69177fa13fcc57229b Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 23 Jan 2025 12:57:06 +0100 Subject: [PATCH 05/15] Fix pydantic versioning problem for _base_attrs Fix encoding in rdf file --- bam_masterdata/cli/cli.py | 2 +- bam_masterdata/metadata/entities.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bam_masterdata/cli/cli.py b/bam_masterdata/cli/cli.py index a2dfc6b..b9d1535 100644 --- a/bam_masterdata/cli/cli.py +++ b/bam_masterdata/cli/cli.py @@ -285,7 +285,7 @@ def export_to_rdf(force_delete, python_path): # Saving RDF/XML to file rdf_output = graph.serialize(format="pretty-xml") masterdata_file = os.path.join(export_dir, "masterdata.owl") - with open(masterdata_file, "w") as f: + with open(masterdata_file, "w", encoding="utf-8") as f: f.write(rdf_output) click.echo( diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index 715f3a8..75ce69d 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -45,7 +45,8 @@ def _base_attrs(self) -> list: if not ( attr_name.startswith("_") or callable(cls_attrs[attr_name]) - or attr_name in ["defs", "model_config"] + or attr_name + in ["defs", "model_config", "model_fields", "model_computed_fields"] ) ] return [getattr(self, attr_name) for attr_name in base_attrs] From 7fc6eb8e0b476f2f27741f8c4ea41841f83229f9 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 23 Jan 2025 15:28:36 +0100 Subject: [PATCH 06/15] Added dataType and propertyLabel annotations Using only rdfs and dc for annotations Restricted pydantic version --- bam_masterdata/cli/entities_to_rdf.py | 52 +++++++++++++++++---------- bam_masterdata/metadata/entities.py | 12 +++---- pyproject.toml | 2 +- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py index be16ea6..213b781 100644 --- a/bam_masterdata/cli/entities_to_rdf.py +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -25,15 +25,33 @@ def rdf_graph_init(g: "Graph") -> None: # Adding annotation properties from base namespaces annotation_props = [ + RDFS.label, + RDFS.comment, DC.identifier, - DC.type, - SKOS.altLabel, - SKOS.definition, - SKOS.prefLabel, ] for prop in annotation_props: g.add((prop, RDF.type, OWL.AnnotationProperty)) + # Custom annotation properties from openBIS: `dataType`, `propertyLabel + custom_annotation_props = { + BAM[ + "dataType" + ]: "Defines the data type of the property type in openBIS (VARCHAR, OBJECT, CONTROLLEDVOCABULARY)", + BAM[ + "propertyLabel" + ]: "A human-readable label used to identify the property in user interfaces or documentation of openBIS.", + } + for custom_prop, custom_prop_def in custom_annotation_props.items(): + g.add((custom_prop, RDF.type, OWL.AnnotationProperty)) + g.add( + ( + custom_prop, + RDFS.label, + Literal(f"obis:{custom_prop.split('/')[-1]}", lang="en"), + ) + ) + g.add((custom_prop, RDFS.comment, Literal(custom_prop_def, lang="en"))) + # Internal BAM properties # ? `section`, `ordinal`, `show_in_edit_views`? bam_props_uri = { @@ -47,13 +65,13 @@ def rdf_graph_init(g: "Graph") -> None: (RDF.type, OWL.ObjectProperty), (RDFS.domain, BAM.ObjectType), (RDFS.range, BAM.PropertyType), - (SKOS.prefLabel, Literal("hasOptionalProperty", lang="en")), + (RDFS.label, Literal("hasOptionalProperty", lang="en")), ], BAM["referenceTo"]: [ (RDF.type, OWL.ObjectProperty), (RDFS.domain, BAM.PropertyType), # Restricting domain to PropertyType (RDFS.range, BAM.ObjectType), # Explicitly setting range to ObjectType - (SKOS.prefLabel, Literal("referenceTo", lang="en")), + (RDFS.label, Literal("referenceTo", lang="en")), ], } for prop_uri, obj_properties in bam_props_uri.items(): @@ -64,7 +82,7 @@ def rdf_graph_init(g: "Graph") -> None: for entity in ["PropertyType", "ObjectType", "CollectionType", "DatasetType"]: entity_uri = BAM[entity] g.add((entity_uri, RDF.type, OWL.Class)) - g.add((entity_uri, SKOS.prefLabel, Literal(entity, lang="en"))) + g.add((entity_uri, RDFS.label, Literal(entity, lang="en"))) def entities_to_rdf( @@ -92,22 +110,18 @@ def entities_to_rdf( graph.add((prop_uri, RDFS.subClassOf, BAM.PropertyType)) # Add attributes like id, code, description in English and Deutsch, property_label, data_type - graph.add((prop_uri, SKOS.prefLabel, Literal(obj.id, lang="en"))) + graph.add((prop_uri, RDFS.label, Literal(obj.id, lang="en"))) graph.add((prop_uri, DC.identifier, Literal(obj.code))) descriptions = obj.description.split("//") if len(descriptions) > 1: - graph.add( - (prop_uri, SKOS.definition, Literal(descriptions[0], lang="en")) - ) - graph.add( - (prop_uri, SKOS.definition, Literal(descriptions[1], lang="de")) - ) + graph.add((prop_uri, RDFS.comment, Literal(descriptions[0], lang="en"))) + graph.add((prop_uri, RDFS.comment, Literal(descriptions[1], lang="de"))) else: - graph.add( - (prop_uri, SKOS.definition, Literal(obj.description, lang="en")) - ) - graph.add((prop_uri, SKOS.altLabel, Literal(obj.property_label, lang="en"))) - graph.add((prop_uri, DC.type, Literal(obj.data_type.value))) + graph.add((prop_uri, RDFS.comment, Literal(obj.description, lang="en"))) + graph.add( + (prop_uri, BAM.propertyLabel, Literal(obj.property_label, lang="en")) + ) + graph.add((prop_uri, BAM.dataType, Literal(obj.data_type.value))) if obj.data_type.value == "OBJECT": # entity_ref_uri = BAM[code_to_class_name(obj.object_code)] # graph.add((prop_uri, BAM.referenceTo, entity_ref_uri)) diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index 75ce69d..50b6069 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -104,19 +104,15 @@ def to_rdf(self, namespace: "Namespace", graph: "Graph") -> None: graph.add((entity_uri, RDFS.subClassOf, parent_uri)) # Add attributes like id, code, description in English and Deutsch, property_label, data_type - graph.add((entity_uri, SKOS.prefLabel, Literal(self.defs.id, lang="en"))) + graph.add((entity_uri, RDFS.label, Literal(self.defs.id, lang="en"))) graph.add((entity_uri, DC.identifier, Literal(self.defs.code))) descriptions = self.defs.description.split("//") if len(descriptions) > 1: - graph.add( - (entity_uri, SKOS.definition, Literal(descriptions[0], lang="en")) - ) - graph.add( - (entity_uri, SKOS.definition, Literal(descriptions[1], lang="de")) - ) + graph.add((entity_uri, RDFS.comment, Literal(descriptions[0], lang="en"))) + graph.add((entity_uri, RDFS.comment, Literal(descriptions[1], lang="de"))) else: graph.add( - (entity_uri, SKOS.definition, Literal(self.defs.description, lang="en")) + (entity_uri, RDFS.comment, Literal(self.defs.description, lang="en")) ) # Adding properties relationships to the entities for assigned_prop in self._base_attrs: diff --git a/pyproject.toml b/pyproject.toml index 5595b93..b49c0d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "pybis~=1.37.1rc4", "openpyxl", "click", - "pydantic", + "pydantic~=2.10.5", "rdflib", ] From 0a6f19ac009d152585ab098010494d64617c3a66 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Fri, 24 Jan 2025 10:03:17 +0100 Subject: [PATCH 07/15] Fix descriptions of annotated properties Deleted inheritance from ObjectType and other types --- bam_masterdata/cli/entities_to_rdf.py | 54 +++++++++++++++++++++------ bam_masterdata/metadata/entities.py | 12 +++++- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py index 213b781..3ec514b 100644 --- a/bam_masterdata/cli/entities_to_rdf.py +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -7,11 +7,12 @@ import click from rdflib import BNode, Literal, Namespace -from rdflib.namespace import DC, OWL, RDF, RDFS, SKOS +from rdflib.namespace import DC, OWL, RDF, RDFS from bam_masterdata.utils import code_to_class_name, import_module -BAM = Namespace("http://bam.de/masterdata/") +BAM = Namespace("https://bamresearch.github.io/bam-masterdata/") +PROV = Namespace("http://www.w3.org/ns/prov#") def rdf_graph_init(g: "Graph") -> None: @@ -20,8 +21,8 @@ def rdf_graph_init(g: "Graph") -> None: g.bind("owl", OWL) g.bind("rdf", RDF) g.bind("rdfs", RDFS) - g.bind("skos", SKOS) g.bind("bam", BAM) + g.bind("prov", PROV) # Adding annotation properties from base namespaces annotation_props = [ @@ -36,10 +37,32 @@ def rdf_graph_init(g: "Graph") -> None: custom_annotation_props = { BAM[ "dataType" - ]: "Defines the data type of the property type in openBIS (VARCHAR, OBJECT, CONTROLLEDVOCABULARY)", + ]: """Represents the data type of a property as defined in the openBIS platform. + This annotation is used to ensure alignment with the native data types in openBIS, + facilitating seamless integration and data exchange. + + The allowed values for this annotation correspond directly to the openBIS type system, + including BOOLEAN, CONTROLLEDVOCABULARY, DATE, HYPERLINK, INTEGER, MULTILINE_VARCHAR, OBJECT, + REAL, TIMESTAMP, VARCHAR, and XML. + + While `bam:dataType` is primarily intended for internal usage with openBIS, mappings to + standard vocabularies such as `xsd` (e.g., `xsd:boolean`, `xsd:string`) are possible to use and documented to + enhance external interoperability. The full mapping is: + - BOOLEAN: xsd:boolean + - CONTROLLEDVOCABULARY: xsd:string + - DATE: xsd:date + - HYPERLINK: xsd:anyURI + - INTEGER: xsd:integer + - MULTILINE_VARCHAR: xsd:string + - OBJECT: bam:ObjectType + - REAL: xsd:decimal + - TIMESTAMP: xsd:dateTime + - VARCHAR: xsd:string + - XML: xsd:string""", BAM[ "propertyLabel" - ]: "A human-readable label used to identify the property in user interfaces or documentation of openBIS.", + ]: """A UI-specific annotation used in openBIS to provide an alternative label for a property + displayed in the frontend. Not intended for semantic reasoning or interoperability beyond openBIS.""", } for custom_prop, custom_prop_def in custom_annotation_props.items(): g.add((custom_prop, RDF.type, OWL.AnnotationProperty)) @@ -47,7 +70,7 @@ def rdf_graph_init(g: "Graph") -> None: ( custom_prop, RDFS.label, - Literal(f"obis:{custom_prop.split('/')[-1]}", lang="en"), + Literal(f"bam:{custom_prop.split('/')[-1]}", lang="en"), ) ) g.add((custom_prop, RDFS.comment, Literal(custom_prop_def, lang="en"))) @@ -59,7 +82,7 @@ def rdf_graph_init(g: "Graph") -> None: (RDF.type, OWL.ObjectProperty), (RDFS.domain, BAM.ObjectType), (RDFS.range, BAM.PropertyType), - (SKOS.prefLabel, Literal("hasMandatoryProperty", lang="en")), + (RDFS.label, Literal("hasMandatoryProperty", lang="en")), ], BAM["hasOptionalProperty"]: [ (RDF.type, OWL.ObjectProperty), @@ -78,11 +101,18 @@ def rdf_graph_init(g: "Graph") -> None: for prop in obj_properties: # type: ignore g.add((prop_uri, prop[0], prop[1])) # type: ignore - # Adding base entity types objects - for entity in ["PropertyType", "ObjectType", "CollectionType", "DatasetType"]: - entity_uri = BAM[entity] - g.add((entity_uri, RDF.type, OWL.Class)) - g.add((entity_uri, RDFS.label, Literal(entity, lang="en"))) + # Adding base PropertyType object as a placeholder for all properties + prop_uri = BAM.PropertyType + g.add((prop_uri, RDF.type, OWL.Class)) + prop_type_description = """A conceptual placeholder used to define and organize properties as first-class entities. + PropertyType is used to place properties and define their metadata, separating properties from the + entities they describe. + + In integration scenarios: + - PropertyType can align with `BFO:Quality` for inherent attributes. + - PropertyType can represent `BFO:Role` if properties serve functional purposes. + - PropertyType can be treated as a `prov:Entity` when properties participate in provenance relationships.""" + g.add((prop_uri, RDFS.comment, Literal(prop_type_description, lang="en"))) def entities_to_rdf( diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index 50b6069..e8e92e0 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator from rdflib import BNode, Literal -from rdflib.namespace import DC, OWL, RDF, RDFS, SKOS +from rdflib.namespace import DC, OWL, RDF, RDFS if TYPE_CHECKING: from rdflib import Graph, Namespace @@ -100,6 +100,16 @@ def to_rdf(self, namespace: "Namespace", graph: "Graph") -> None: parent_classes = self.__class__.__bases__ for parent_class in parent_classes: if issubclass(parent_class, BaseEntity) and parent_class != BaseEntity: + if parent_class.__name__ in [ + "ObjectType", + "CollectionType", + "DatasetType", + ]: + # ! add here logic of subClassOf connecting with PROV-O or BFO + # ! maybe via classes instead of ObjectType/CollectionType/DatasetType? + # ! Example: + # ! graph.add((entity_uri, RDFS.subClassOf, "http://www.w3.org/ns/prov#Entity")) + continue parent_uri = namespace[parent_class.__name__] graph.add((entity_uri, RDFS.subClassOf, parent_uri)) From fbe9ba39b5329b8b30cb183073ba24a5b5186a15 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Fri, 24 Jan 2025 13:05:53 +0100 Subject: [PATCH 08/15] Adding back the placeholders for object, collections and datasets Changed OWL.Class to OWL.Thing Added descriptions for relationships --- bam_masterdata/cli/entities_to_rdf.py | 41 +++++++++++++++++++++------ bam_masterdata/metadata/entities.py | 22 +++++++------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py index 3ec514b..94d41d7 100644 --- a/bam_masterdata/cli/entities_to_rdf.py +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -80,30 +80,50 @@ def rdf_graph_init(g: "Graph") -> None: bam_props_uri = { BAM["hasMandatoryProperty"]: [ (RDF.type, OWL.ObjectProperty), - (RDFS.domain, BAM.ObjectType), + # (RDFS.domain, OWL.Class), (RDFS.range, BAM.PropertyType), (RDFS.label, Literal("hasMandatoryProperty", lang="en")), + ( + RDFS.comment, + Literal( + "The property must be mandatorily filled when creating the object in openBIS.", + lang="en", + ), + ), ], BAM["hasOptionalProperty"]: [ (RDF.type, OWL.ObjectProperty), - (RDFS.domain, BAM.ObjectType), + # (RDFS.domain, OWL.Class), (RDFS.range, BAM.PropertyType), (RDFS.label, Literal("hasOptionalProperty", lang="en")), + ( + RDFS.comment, + Literal( + "The property is optionally filled when creating the object in openBIS.", + lang="en", + ), + ), ], BAM["referenceTo"]: [ (RDF.type, OWL.ObjectProperty), (RDFS.domain, BAM.PropertyType), # Restricting domain to PropertyType - (RDFS.range, BAM.ObjectType), # Explicitly setting range to ObjectType + # (RDFS.range, OWL.Class), # Explicitly setting range to ObjectType (RDFS.label, Literal("referenceTo", lang="en")), + ( + RDFS.comment, + Literal( + "The property is referencing an object existing in openBIS.", + lang="en", + ), + ), ], } for prop_uri, obj_properties in bam_props_uri.items(): for prop in obj_properties: # type: ignore g.add((prop_uri, prop[0], prop[1])) # type: ignore - # Adding base PropertyType object as a placeholder for all properties - prop_uri = BAM.PropertyType - g.add((prop_uri, RDF.type, OWL.Class)) + # Adding base PropertyType and other objects as placeholders + # ! add only PropertyType prop_type_description = """A conceptual placeholder used to define and organize properties as first-class entities. PropertyType is used to place properties and define their metadata, separating properties from the entities they describe. @@ -112,7 +132,12 @@ def rdf_graph_init(g: "Graph") -> None: - PropertyType can align with `BFO:Quality` for inherent attributes. - PropertyType can represent `BFO:Role` if properties serve functional purposes. - PropertyType can be treated as a `prov:Entity` when properties participate in provenance relationships.""" - g.add((prop_uri, RDFS.comment, Literal(prop_type_description, lang="en"))) + for entity in ["PropertyType", "ObjectType", "CollectionType", "DatasetType"]: + entity_uri = BAM[entity] + g.add((entity_uri, RDF.type, OWL.Thing)) + g.add((entity_uri, RDFS.label, Literal(entity, lang="en"))) + if entity == "PropertyType": + g.add((entity_uri, RDFS.comment, Literal(prop_type_description, lang="en"))) def entities_to_rdf( @@ -136,7 +161,7 @@ def entities_to_rdf( prop_uri = BAM[obj.id] # Define the property as an OWL class inheriting from PropertyType - graph.add((prop_uri, RDF.type, OWL.Class)) + graph.add((prop_uri, RDF.type, OWL.Thing)) graph.add((prop_uri, RDFS.subClassOf, BAM.PropertyType)) # Add attributes like id, code, description in English and Deutsch, property_label, data_type diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index e8e92e0..d58f7d7 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -96,20 +96,20 @@ def to_rdf(self, namespace: "Namespace", graph: "Graph") -> None: entity_uri = namespace[self.defs.id] # Define the entity as an OWL class inheriting from the specific namespace type - graph.add((entity_uri, RDF.type, OWL.Class)) + graph.add((entity_uri, RDF.type, OWL.Thing)) parent_classes = self.__class__.__bases__ for parent_class in parent_classes: if issubclass(parent_class, BaseEntity) and parent_class != BaseEntity: - if parent_class.__name__ in [ - "ObjectType", - "CollectionType", - "DatasetType", - ]: - # ! add here logic of subClassOf connecting with PROV-O or BFO - # ! maybe via classes instead of ObjectType/CollectionType/DatasetType? - # ! Example: - # ! graph.add((entity_uri, RDFS.subClassOf, "http://www.w3.org/ns/prov#Entity")) - continue + # if parent_class.__name__ in [ + # "ObjectType", + # "CollectionType", + # "DatasetType", + # ]: + # # ! add here logic of subClassOf connecting with PROV-O or BFO + # # ! maybe via classes instead of ObjectType/CollectionType/DatasetType? + # # ! Example: + # # ! graph.add((entity_uri, RDFS.subClassOf, "http://www.w3.org/ns/prov#Entity")) + # continue parent_uri = namespace[parent_class.__name__] graph.add((entity_uri, RDFS.subClassOf, parent_uri)) From a82d839db317c2547a2495d5491f612f47cf074b Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 30 Jan 2025 13:20:44 +0100 Subject: [PATCH 09/15] Renamed model_to_rdf for BaseEntity --- bam_masterdata/cli/cli.py | 2 +- bam_masterdata/cli/entities_to_rdf.py | 6 +++--- bam_masterdata/metadata/entities.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_masterdata/cli/cli.py b/bam_masterdata/cli/cli.py index b9d1535..1678715 100644 --- a/bam_masterdata/cli/cli.py +++ b/bam_masterdata/cli/cli.py @@ -277,7 +277,7 @@ def export_to_rdf(force_delete, python_path): module for module in py_modules if "vocabulary_types.py" not in module ] - # Process each module using the `to_json` method of each entity + # Process each module using the `model_to_rdf` method of each entity graph = Graph() for module_path in py_modules: entities_to_rdf(graph=graph, module_path=module_path, logger=logger) diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py index 94d41d7..2803531 100644 --- a/bam_masterdata/cli/entities_to_rdf.py +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -202,12 +202,12 @@ def entities_to_rdf( # ? For OBJECT TYPES # ? `generated_code_prefix`, `auto_generated_codes`? for name, obj in inspect.getmembers(module, inspect.isclass): - # Ensure the class has the `to_json` method - if not hasattr(obj, "defs") or not callable(getattr(obj, "to_rdf")): + # Ensure the class has the `model_to_rdf` method + if not hasattr(obj, "defs") or not callable(getattr(obj, "model_to_rdf")): continue try: # Instantiate the class and call the method entity = obj() - entity.to_rdf(namespace=BAM, graph=graph) + entity.model_to_rdf(namespace=BAM, graph=graph) except Exception as err: click.echo(f"Failed to process class {name} in {module_path}: {err}") diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index d58f7d7..5943a33 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -92,7 +92,7 @@ def model_to_dict(self) -> dict: # ? For OBJECT TYPES # ? `generated_code_prefix`, `auto_generated_codes`? @no_type_check - def to_rdf(self, namespace: "Namespace", graph: "Graph") -> None: + def model_to_rdf(self, namespace: "Namespace", graph: "Graph") -> None: entity_uri = namespace[self.defs.id] # Define the entity as an OWL class inheriting from the specific namespace type From 02ab05e95f0174f20e1e9e393af8ade4f1369458 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 30 Jan 2025 13:36:42 +0100 Subject: [PATCH 10/15] Fixed code_to_class_name when the code does not exist --- bam_masterdata/cli/entities_to_rdf.py | 7 ++++++- bam_masterdata/cli/fill_masterdata.py | 7 +++++-- bam_masterdata/utils/utils.py | 27 +++++++++++++++++++++------ tests/utils/test_utils.py | 5 ++++- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py index 2803531..dfc384b 100644 --- a/bam_masterdata/cli/entities_to_rdf.py +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -180,7 +180,12 @@ def entities_to_rdf( if obj.data_type.value == "OBJECT": # entity_ref_uri = BAM[code_to_class_name(obj.object_code)] # graph.add((prop_uri, BAM.referenceTo, entity_ref_uri)) - entity_ref_uri = BAM[code_to_class_name(obj.object_code)] + if not code_to_class_name(obj.object_code, logger): + logger.error( + f"Failed to identify the `object_code` for the property {obj.id}" + ) + continue + entity_ref_uri = BAM[code_to_class_name(obj.object_code, logger)] # Create a restriction with referenceTo restriction = BNode() diff --git a/bam_masterdata/cli/fill_masterdata.py b/bam_masterdata/cli/fill_masterdata.py index 05ac9e7..3b98586 100644 --- a/bam_masterdata/cli/fill_masterdata.py +++ b/bam_masterdata/cli/fill_masterdata.py @@ -55,7 +55,7 @@ class will inherit from `parent_class`. class_names (dict): A dictionary with the class names of the entities. default (str): The default parent class if the parent class does not exist. lines (list): A list of strings to be printed to the Python module. - + logger (BoundLoggerLazyProxy): The logger to log messages. Returns: tuple: The parent code, parent class, and class name of the entity. """ @@ -138,6 +138,9 @@ def generate_property_types(self) -> str: Generate Python code for the property types in the Openbis datamodel. The code is generated as a string which is then printed out to the specific Python module in `bam_masterdata/datamodel/property_types.py`. + Args: + logger (BoundLoggerLazyProxy): The logger to log messages. + Returns: str: Python code for the property types. """ @@ -154,7 +157,7 @@ def generate_property_types(self) -> str: continue # Format class name - class_name = code_to_class_name(code, entity_type="property") + class_name = code_to_class_name(code=code, entity_type="property") # Add class definition lines.append(f"{class_name} = PropertyTypeDef(") diff --git a/bam_masterdata/utils/utils.py b/bam_masterdata/utils/utils.py index d3c1d49..61dcff7 100644 --- a/bam_masterdata/utils/utils.py +++ b/bam_masterdata/utils/utils.py @@ -4,21 +4,25 @@ import os import shutil from itertools import chain -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional + +from bam_masterdata.logger import logger if TYPE_CHECKING: from structlog._config import BoundLoggerLazyProxy def delete_and_create_dir( - directory_path: str, logger: "BoundLoggerLazyProxy", force_delete: bool = False + directory_path: str, + logger: "BoundLoggerLazyProxy" = logger, + force_delete: bool = False, ) -> None: """ Deletes the directory at `directory_path` and creates a new one in the same path. Args: directory_path (str): The directory path to delete and create the folder. - logger (BoundLoggerLazyProxy): The logger to log messages.. + logger (BoundLoggerLazyProxy): The logger to log messages. Default is `logger`. force_delete (bool): If True, the directory will be forcibly deleted if it exists. """ if not directory_path: @@ -45,7 +49,7 @@ def delete_and_create_dir( def listdir_py_modules( - directory_path: str, logger: "BoundLoggerLazyProxy" + directory_path: str, logger: "BoundLoggerLazyProxy" = logger ) -> list[str]: """ Recursively goes through the `directory_path` and returns a list of all .py files that do not start with '_'. If @@ -53,7 +57,7 @@ def listdir_py_modules( Args: directory_path (str): The directory path to search through. - logger (BoundLoggerLazyProxy): The logger to log messages. + logger (BoundLoggerLazyProxy): The logger to log messages. Default is `logger`. Returns: list[str]: A list of all .py files that do not start with '_' @@ -96,7 +100,11 @@ def import_module(module_path: str) -> Any: return module -def code_to_class_name(code: str, entity_type: str = "object") -> str: +def code_to_class_name( + code: Optional[str], + logger: "BoundLoggerLazyProxy" = logger, + entity_type: str = "object", +) -> str: """ Converts an openBIS `code` to a class name by capitalizing each word and removing special characters. In the special case the entity is a property type, it retains the full name separated by points instead of @@ -104,10 +112,17 @@ def code_to_class_name(code: str, entity_type: str = "object") -> str: Args: code (str): The openBIS code to convert to a class name. + logger (BoundLoggerLazyProxy): The logger to log messages. Default is `logger`. entity_type (str): The type of entity to convert. Default is "object". Returns: str: The class name derived from the openBIS code. """ + if not code: + logger.error( + "The `code` is empty. Please, provide a proper input to the function." + ) + return "" + if entity_type == "property": code_names = chain.from_iterable( [c.split("_") for c in code.lstrip("$").split(".")] diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index a11dfe2..17965c7 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -119,6 +119,9 @@ def test_import_module(): @pytest.mark.parametrize( "code, entity_type, result", [ + # No code + (None, "object", ""), + ("", "object", ""), # for entities which are objects # normal code ("NORMAL", "object", "Normal"), @@ -156,7 +159,7 @@ def test_import_module(): ], ) def test_code_to_class_name(code: str, entity_type: str, result: str): - assert code_to_class_name(code, entity_type) == result + assert code_to_class_name(code, logger, entity_type) == result @pytest.mark.parametrize( From 66cd1b43c6187fa4c23c21c536db25af0c1cef7f Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 30 Jan 2025 14:01:21 +0100 Subject: [PATCH 11/15] Added docstrings to entities_to_rdf.py --- bam_masterdata/cli/entities_to_rdf.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/bam_masterdata/cli/entities_to_rdf.py b/bam_masterdata/cli/entities_to_rdf.py index dfc384b..0f953bc 100644 --- a/bam_masterdata/cli/entities_to_rdf.py +++ b/bam_masterdata/cli/entities_to_rdf.py @@ -16,6 +16,14 @@ def rdf_graph_init(g: "Graph") -> None: + """ + Initialize the RDF graph with base namespaces, annotation properties, and internal BAM properties. This + function also creates placeholders for PropertyType and other entity types. The graph is to be printed out + in RDF/XML format in the `entities_to_rdf` function. + + Args: + g (Graph): The RDF graph to be initialized. + """ # Adding base namespaces g.bind("dc", DC) g.bind("owl", OWL) @@ -143,6 +151,16 @@ def rdf_graph_init(g: "Graph") -> None: def entities_to_rdf( graph: "Graph", module_path: str, logger: "BoundLoggerLazyProxy" ) -> None: + """ + Convert the entities defined in the specified module to RDF triples and add them to the graph. The function + uses the `model_to_rdf` method defined in each class to convert the class attributes to RDF triples. The + function also adds the PropertyType and other entity types as placeholders in the graph. + + Args: + graph (Graph): The RDF graph to which the entities are added. + module_path (str): The path to the module containing the entities to be converted. + logger (BoundLoggerLazyProxy): The logger to log messages. + """ rdf_graph_init(graph) module = import_module(module_path=module_path) From c6f078d254ee0a3fcf465d97eb8e4049d8592a60 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 30 Jan 2025 17:00:37 +0100 Subject: [PATCH 12/15] Fix duplicated property problems Added duplicated_property_types function in CLI --- bam_masterdata/cli/cli.py | 19 +++++++++++ .../cli/duplicated_property_types.py | 34 +++++++++++++++++++ bam_masterdata/datamodel/property_types.py | 6 ++-- bam_masterdata/metadata/definitions.py | 5 ++- 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 bam_masterdata/cli/duplicated_property_types.py diff --git a/bam_masterdata/cli/cli.py b/bam_masterdata/cli/cli.py index 1678715..0bb0d5a 100644 --- a/bam_masterdata/cli/cli.py +++ b/bam_masterdata/cli/cli.py @@ -8,6 +8,7 @@ from openpyxl import Workbook from rdflib import Graph +from bam_masterdata.cli.duplicated_property_types import duplicated_property_types from bam_masterdata.cli.entities_to_excel import entities_to_excel from bam_masterdata.cli.entities_to_json import entities_to_json from bam_masterdata.cli.entities_to_rdf import entities_to_rdf @@ -166,6 +167,12 @@ def export_to_json(force_delete, python_path): # Process each module using the `model_to_json` method of each entity for module_path in py_modules: + if module_path.endswith("property_types.py"): + if duplicated_property_types(module_path=module_path, logger=logger): + click.echo( + "Please fix the duplicated property types before exporting to RDF/XML." + ) + return entities_to_json(module_path=module_path, export_dir=export_dir, logger=logger) click.echo(f"All entity artifacts have been generated and saved to {export_dir}") @@ -216,6 +223,12 @@ def export_to_excel(force_delete, python_path): masterdata_file = os.path.join(export_dir, "masterdata.xlsx") wb = Workbook() for i, module_path in enumerate(py_modules): + if module_path.endswith("property_types.py"): + if duplicated_property_types(module_path=module_path, logger=logger): + click.echo( + "Please fix the duplicated property types before exporting to RDF/XML." + ) + return if i == 0: ws = wb.active else: @@ -280,6 +293,12 @@ def export_to_rdf(force_delete, python_path): # Process each module using the `model_to_rdf` method of each entity graph = Graph() for module_path in py_modules: + if module_path.endswith("property_types.py"): + if duplicated_property_types(module_path=module_path, logger=logger): + click.echo( + "Please fix the duplicated property types before exporting to RDF/XML." + ) + return entities_to_rdf(graph=graph, module_path=module_path, logger=logger) # Saving RDF/XML to file diff --git a/bam_masterdata/cli/duplicated_property_types.py b/bam_masterdata/cli/duplicated_property_types.py new file mode 100644 index 0000000..f013033 --- /dev/null +++ b/bam_masterdata/cli/duplicated_property_types.py @@ -0,0 +1,34 @@ +import inspect +import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from structlog._config import BoundLoggerLazyProxy + +from bam_masterdata.utils import import_module + + +def duplicated_property_types(module_path: str, logger: "BoundLoggerLazyProxy") -> dict: + duplicated_props: dict = {} + module = import_module(module_path=module_path) + source_code = inspect.getsource(module) + for name, _ in inspect.getmembers(module): + if name.startswith("_") or name == "PropertyTypeDef": + continue + + pattern = rf"^\s*{name} *= *PropertyTypeDef" + + # Find all matching line numbers + matches = [ + i + 1 # Convert to 1-based index + for i, line in enumerate(source_code.splitlines()) + if re.match(pattern, line) + ] + if len(matches) > 1: + duplicated_props[name] = matches + if duplicated_props: + logger.critical( + f"Found {len(duplicated_props)} duplicated property types. These are stored in a dictionary " + f"where the keys are the names of the variables in property_types.py and the values are the lines in the module: {duplicated_props}" + ) + return duplicated_props diff --git a/bam_masterdata/datamodel/property_types.py b/bam_masterdata/datamodel/property_types.py index 3762054..3510acc 100644 --- a/bam_masterdata/datamodel/property_types.py +++ b/bam_masterdata/datamodel/property_types.py @@ -3259,7 +3259,8 @@ ) -ProductCategory = PropertyTypeDef( +# ! Duplicated variable name for the property type definition (manually fixed) +ProductCategory1 = PropertyTypeDef( code="PRODUCT_CATEGORY", description="""Product Category (corresponds to field `Product Category` in the Hazardous Materials Inventory (GSM) of BAM)//Produktkategorie (entspricht Feld `Verwendungstypen/Produktkategorie` aus dem Gefahrstoffmanagement (GSM) der BAM))""", data_type="CONTROLLEDVOCABULARY", @@ -5897,7 +5898,8 @@ ) -ProductCategory = PropertyTypeDef( +# ! Duplicated variable name for the property type definition (manually fixed) +ProductCategory2 = PropertyTypeDef( code="PRODUCT.CATEGORY", description="""Category""", data_type="VARCHAR", diff --git a/bam_masterdata/metadata/definitions.py b/bam_masterdata/metadata/definitions.py index 6496a78..e5a4494 100644 --- a/bam_masterdata/metadata/definitions.py +++ b/bam_masterdata/metadata/definitions.py @@ -143,7 +143,10 @@ def model_id(cls, data: Any) -> Any: Returns: Any: The data with the validated fields. """ - data.id = code_to_class_name(data.code) + if "PropertyType" in data.name: + data.id = code_to_class_name(code=data.code, entity_type="property") + else: + data.id = code_to_class_name(code=data.code, entity_type="object") return data From aa69f61ec1461c6ce55670274d5558252da7ae2f Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 6 Feb 2025 15:41:36 +0100 Subject: [PATCH 13/15] Moved duplicated_property_types to utils and added tests --- bam_masterdata/cli/cli.py | 2 +- .../cli/duplicated_property_types.py | 34 --------------- bam_masterdata/utils/__init__.py | 1 + bam_masterdata/utils/utils.py | 43 +++++++++++++++++++ tests/data/utils/example_prop_types_1.py | 23 ++++++++++ tests/data/utils/example_prop_types_2.py | 16 +++++++ tests/utils/test_utils.py | 24 ++++++++++- 7 files changed, 107 insertions(+), 36 deletions(-) delete mode 100644 bam_masterdata/cli/duplicated_property_types.py create mode 100644 tests/data/utils/example_prop_types_1.py create mode 100644 tests/data/utils/example_prop_types_2.py diff --git a/bam_masterdata/cli/cli.py b/bam_masterdata/cli/cli.py index 0bb0d5a..f4e6e80 100644 --- a/bam_masterdata/cli/cli.py +++ b/bam_masterdata/cli/cli.py @@ -8,7 +8,6 @@ from openpyxl import Workbook from rdflib import Graph -from bam_masterdata.cli.duplicated_property_types import duplicated_property_types from bam_masterdata.cli.entities_to_excel import entities_to_excel from bam_masterdata.cli.entities_to_json import entities_to_json from bam_masterdata.cli.entities_to_rdf import entities_to_rdf @@ -16,6 +15,7 @@ from bam_masterdata.logger import logger from bam_masterdata.utils import ( delete_and_create_dir, + duplicated_property_types, import_module, listdir_py_modules, ) diff --git a/bam_masterdata/cli/duplicated_property_types.py b/bam_masterdata/cli/duplicated_property_types.py deleted file mode 100644 index f013033..0000000 --- a/bam_masterdata/cli/duplicated_property_types.py +++ /dev/null @@ -1,34 +0,0 @@ -import inspect -import re -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from structlog._config import BoundLoggerLazyProxy - -from bam_masterdata.utils import import_module - - -def duplicated_property_types(module_path: str, logger: "BoundLoggerLazyProxy") -> dict: - duplicated_props: dict = {} - module = import_module(module_path=module_path) - source_code = inspect.getsource(module) - for name, _ in inspect.getmembers(module): - if name.startswith("_") or name == "PropertyTypeDef": - continue - - pattern = rf"^\s*{name} *= *PropertyTypeDef" - - # Find all matching line numbers - matches = [ - i + 1 # Convert to 1-based index - for i, line in enumerate(source_code.splitlines()) - if re.match(pattern, line) - ] - if len(matches) > 1: - duplicated_props[name] = matches - if duplicated_props: - logger.critical( - f"Found {len(duplicated_props)} duplicated property types. These are stored in a dictionary " - f"where the keys are the names of the variables in property_types.py and the values are the lines in the module: {duplicated_props}" - ) - return duplicated_props diff --git a/bam_masterdata/utils/__init__.py b/bam_masterdata/utils/__init__.py index 6f16e11..286b484 100644 --- a/bam_masterdata/utils/__init__.py +++ b/bam_masterdata/utils/__init__.py @@ -1,6 +1,7 @@ from .utils import ( code_to_class_name, delete_and_create_dir, + duplicated_property_types, import_module, listdir_py_modules, load_validation_rules, diff --git a/bam_masterdata/utils/utils.py b/bam_masterdata/utils/utils.py index 61dcff7..5139a12 100644 --- a/bam_masterdata/utils/utils.py +++ b/bam_masterdata/utils/utils.py @@ -1,7 +1,9 @@ import glob import importlib.util +import inspect import json import os +import re import shutil from itertools import chain from typing import TYPE_CHECKING, Any, Optional @@ -160,3 +162,44 @@ def load_validation_rules( except json.JSONDecodeError as e: logger.error(f"Error parsing validation rules JSON: {e}") raise ValueError(f"Error parsing validation rules JSON: {e}") + + +from pathlib import Path + + +def duplicated_property_types(module_path: str, logger: "BoundLoggerLazyProxy") -> dict: + """ + Find the duplicated property types in a module specified by `module_path` and returns a dictionary + containing the duplicated property types class names as keys and the lines where they matched as values. + + Args: + module_path (str): The path to the module containing the property types. + logger (BoundLoggerLazyProxy): The logger to log messages. + + Returns: + dict: A dictionary containing the duplicated property types class names as keys and the + lines where they matched as values. + """ + duplicated_props: dict = {} + module = import_module(module_path=module_path) + source_code = inspect.getsource(module) + for name, _ in inspect.getmembers(module): + if name.startswith("_") or name == "PropertyTypeDef": + continue + + pattern = rf"^\s*{name} *= *PropertyTypeDef" + + # Find all matching line numbers + matches = [ + i + 1 # Convert to 1-based index + for i, line in enumerate(source_code.splitlines()) + if re.match(pattern, line) + ] + if len(matches) > 1: + duplicated_props[name] = matches + if duplicated_props: + logger.critical( + f"Found {len(duplicated_props)} duplicated property types. These are stored in a dictionary " + f"where the keys are the names of the variables in property_types.py and the values are the lines in the module: {duplicated_props}" + ) + return duplicated_props diff --git a/tests/data/utils/example_prop_types_1.py b/tests/data/utils/example_prop_types_1.py new file mode 100644 index 0000000..af69407 --- /dev/null +++ b/tests/data/utils/example_prop_types_1.py @@ -0,0 +1,23 @@ +from bam_masterdata.metadata.definitions import PropertyTypeDef + +PropA = PropertyTypeDef( + code="PROPA", + description="""repeated property""", + data_type="VARCHAR", + property_label="A1", +) + + +PropB = PropertyTypeDef( + code="PROPB", + description="""non-repeated property""", + data_type="VARCHAR", + property_label="B", +) + +PropA = PropertyTypeDef( + code="PROPA", + description="""repeated property""", + data_type="VARCHAR", + property_label="A2", +) diff --git a/tests/data/utils/example_prop_types_2.py b/tests/data/utils/example_prop_types_2.py new file mode 100644 index 0000000..c8c98be --- /dev/null +++ b/tests/data/utils/example_prop_types_2.py @@ -0,0 +1,16 @@ +from bam_masterdata.metadata.definitions import PropertyTypeDef + +PropA = PropertyTypeDef( + code="PROPA", + description="""non-repeated property""", + data_type="VARCHAR", + property_label="A", +) + + +PropB = PropertyTypeDef( + code="PROPB", + description="""non-repeated property""", + data_type="VARCHAR", + property_label="B", +) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 17965c7..30cde06 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -10,6 +10,7 @@ from bam_masterdata.utils import ( code_to_class_name, delete_and_create_dir, + duplicated_property_types, import_module, listdir_py_modules, load_validation_rules, @@ -62,7 +63,12 @@ def test_delete_and_create_dir( "warning", ), # No Python files found in the directory - ("./tests/data", [], "No Python files found in the directory.", "info"), + ( + "./tests/data/empty", + [], + "No Python files found in the directory.", + "info", + ), # Python files found in the directory ( "./tests/utils", @@ -270,3 +276,19 @@ def test_load_validation_rules( assert result == expected_output assert cleared_log_storage[-1]["event"] == expected_log assert cleared_log_storage[-1]["level"] == "info" + + +@pytest.mark.parametrize( + "path, result", + [ + # PropA appears twice + ("tests/data/utils/example_prop_types_1.py", {"PropA": [3, 18]}), + # None duplicated + ("tests/data/utils/example_prop_types_2.py", {}), + ], +) +def test_duplicated_property_types(cleared_log_storage: list, path: str, result: dict): + assert result == duplicated_property_types(path, logger) + if result: + assert cleared_log_storage[0]["level"] == "critical" + assert "Found 1 duplicated property types" in cleared_log_storage[0]["event"] From 11383d6c981255af41fda3de9edc2d211ea54d72 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 6 Feb 2025 16:22:48 +0100 Subject: [PATCH 14/15] Added testing for entities_to_rdf --- tests/cli/test_entities_to_rdf.py | 114 ++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 tests/cli/test_entities_to_rdf.py diff --git a/tests/cli/test_entities_to_rdf.py b/tests/cli/test_entities_to_rdf.py new file mode 100644 index 0000000..73e6e3d --- /dev/null +++ b/tests/cli/test_entities_to_rdf.py @@ -0,0 +1,114 @@ +import json +import os +import shutil + +import pytest +from rdflib import BNode, Graph, Literal, Namespace, URIRef +from rdflib.namespace import DC, OWL, RDF, RDFS + +from bam_masterdata.cli.entities_to_rdf import BAM, entities_to_rdf, rdf_graph_init +from bam_masterdata.logger import logger + + +def test_rdf_init(): + """ + Test the `rdf_graph_init` function. + """ + graph = Graph() + rdf_graph_init(graph) + + # Test how many nodes initialize in the graph + assert len(graph) == 30 + + # Check if base namespaces are bound correctly. + expected_namespaces = {"dc", "owl", "rdf", "rdfs", "bam", "prov"} + bound_namespaces = {prefix for prefix, _ in graph.namespaces()} + expected_namespaces.issubset(bound_namespaces) + + # Ensure standard annotation properties exist with correct types. + annotation_props = [RDFS.label, RDFS.comment, DC.identifier] + for prop in annotation_props: + assert (prop, RDF.type, OWL.AnnotationProperty) in graph + + # Verify bam:dataType and bam:propertyLabel exist with labels and comments. + custom_props = { + BAM["dataType"]: "Represents the data type of a property", + BAM["propertyLabel"]: "A UI-specific annotation used in openBIS", + } + for prop, comment_start in custom_props.items(): + assert (prop, RDF.type, OWL.AnnotationProperty) in graph + assert ( + prop, + RDFS.label, + Literal(f"bam:{prop.split('/')[-1]}", lang="en"), + ) in graph + assert any( + o.startswith(comment_start) + for _, _, o in graph.triples((prop, RDFS.comment, None)) + ) + + # Check that BAM object properties exist and have correct characteristics. + bam_props = { + BAM["hasMandatoryProperty"]: "The property must be mandatorily filled", + BAM["hasOptionalProperty"]: "The property is optionally filled", + BAM["referenceTo"]: "The property is referencing an object", + } + for prop, comment_start in bam_props.items(): + assert (prop, RDF.type, OWL.ObjectProperty) in graph + assert any( + o.startswith(comment_start) + for _, _, o in graph.triples((prop, RDFS.comment, None)) + ) + + # Ensure PropertyType and related objects exist with labels and comments. + prop_type_uri = BAM["PropertyType"] + assert (prop_type_uri, RDF.type, OWL.Thing) in graph + assert (prop_type_uri, RDFS.label, Literal("PropertyType", lang="en")) in graph + assert any( + o.startswith("A conceptual placeholder used to define") + for _, _, o in graph.triples((prop_type_uri, RDFS.comment, None)) + ) + + +def test_entities_to_rdf(): + module_name = "object_types" # ! only one module for testing + module_path = os.path.join("./bam_masterdata/datamodel", f"{module_name}.py") + + graph = Graph() + rdf_graph_init(graph) + entities_to_rdf(graph=graph, module_path=module_path, logger=logger) + + # Testing + # ! this number is subject to change as the datamodel evolves + assert len(graph) == 5794 + + # Check Instrument entity + instrument_uri = BAM["Instrument"] + assert (instrument_uri, RDF.type, OWL.Thing) in graph + assert (instrument_uri, RDFS.label, Literal("Instrument", lang="en")) in graph + assert ( + instrument_uri, + RDFS.comment, + Literal("Measuring Instrument", lang="en"), + ) in graph + assert ( + instrument_uri, + RDFS.comment, + Literal("Messgerät", lang="de"), + ) in graph + + # Check Camera entity (subclass of Instrument) + camera_uri = BAM["Camera"] + assert (camera_uri, RDF.type, OWL.Thing) in graph + assert (camera_uri, RDFS.subClassOf, instrument_uri) in graph + assert (camera_uri, RDFS.label, Literal("Camera", lang="en")) in graph + assert ( + camera_uri, + RDFS.comment, + Literal("A generic camera device for recording video or photos", lang="en"), + ) in graph + assert ( + camera_uri, + RDFS.comment, + Literal("Eine generische Kamera für Video- oder Fotoaufnahmen", lang="de"), + ) in graph From 8c342d3732a943aa0fcb51fb69c11070795cfce1 Mon Sep 17 00:00:00 2001 From: jpizarro Date: Thu, 6 Feb 2025 16:23:04 +0100 Subject: [PATCH 15/15] Fix imports --- tests/cli/test_entities_to_rdf.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/cli/test_entities_to_rdf.py b/tests/cli/test_entities_to_rdf.py index 73e6e3d..8a6e08b 100644 --- a/tests/cli/test_entities_to_rdf.py +++ b/tests/cli/test_entities_to_rdf.py @@ -1,9 +1,6 @@ -import json import os -import shutil -import pytest -from rdflib import BNode, Graph, Literal, Namespace, URIRef +from rdflib import Graph, Literal from rdflib.namespace import DC, OWL, RDF, RDFS from bam_masterdata.cli.entities_to_rdf import BAM, entities_to_rdf, rdf_graph_init