diff --git a/src/dsp_tools/commands/validate_data/models/data_deserialised.py b/src/dsp_tools/commands/validate_data/models/data_deserialised.py index 5459d14e05..d1b0d6ce9c 100644 --- a/src/dsp_tools/commands/validate_data/models/data_deserialised.py +++ b/src/dsp_tools/commands/validate_data/models/data_deserialised.py @@ -3,6 +3,15 @@ from abc import ABC from dataclasses import dataclass +from lxml import etree + + +@dataclass +class XMLProject: + shortcode: str + root: etree._Element + used_ontologies: set[str] + @dataclass class ProjectDeserialised: diff --git a/src/dsp_tools/commands/validate_data/validate_data.py b/src/dsp_tools/commands/validate_data/validate_data.py index 544ea08066..0ed5ea92b6 100644 --- a/src/dsp_tools/commands/validate_data/validate_data.py +++ b/src/dsp_tools/commands/validate_data/validate_data.py @@ -11,17 +11,20 @@ from dsp_tools.commands.validate_data.deserialise_input import deserialise_xml from dsp_tools.commands.validate_data.make_data_rdf import make_data_rdf from dsp_tools.commands.validate_data.models.data_deserialised import ProjectDeserialised +from dsp_tools.commands.validate_data.models.data_deserialised import XMLProject from dsp_tools.commands.validate_data.models.data_rdf import DataRDF from dsp_tools.commands.validate_data.models.validation import RDFGraphs from dsp_tools.commands.validate_data.models.validation import ValidationReport from dsp_tools.commands.validate_data.reformat_validaton_result import reformat_validation_graph from dsp_tools.commands.validate_data.sparql.construct_shacl import construct_shapes_graph +from dsp_tools.models.exceptions import InputError from dsp_tools.utils.xml_utils import parse_xml_file from dsp_tools.utils.xml_utils import remove_comments_from_element_tree from dsp_tools.utils.xml_utils import transform_into_localnames from dsp_tools.utils.xml_validation import validate_xml LIST_SEPARATOR = "\n - " +KNORA_API = "http://api.knora.org/ontology/knora-api/v2#" def validate_data(filepath: Path, api_url: str, dev_route: bool, save_graphs: bool) -> bool: # noqa: ARG001 (unused argument) @@ -141,13 +144,13 @@ def _validate(validator: ShaclValidator, rdf_graphs: RDFGraphs) -> ValidationRep def _get_data_info_from_file(file: Path, api_url: str) -> tuple[DataRDF, str]: - cleaned_root = _parse_and_clean_file(file, api_url) - deserialised: ProjectDeserialised = deserialise_xml(cleaned_root) + xml_project = _parse_and_clean_file(file, api_url) + deserialised: ProjectDeserialised = deserialise_xml(xml_project.root) rdf_data: DataRDF = make_data_rdf(deserialised.data) return rdf_data, deserialised.info.shortcode -def _parse_and_clean_file(file: Path, api_url: str) -> etree._Element: +def _parse_and_clean_file(file: Path, api_url: str) -> XMLProject: root = parse_xml_file(file) root = remove_comments_from_element_tree(root) validate_xml(root) @@ -155,13 +158,40 @@ def _parse_and_clean_file(file: Path, api_url: str) -> etree._Element: return _replace_namespaces(root, api_url) -def _replace_namespaces(root: etree._Element, api_url: str) -> etree._Element: - with open("src/dsp_tools/resources/validate_data/replace_namespace.xslt", "rb") as xslt_file: - xslt_data = xslt_file.read() +def _replace_namespaces(root: etree._Element, api_url: str) -> XMLProject: + new_root = deepcopy(root) shortcode = root.attrib["shortcode"] default_ontology = root.attrib["default-ontology"] - namespace = f"{api_url}/ontology/{shortcode}/{default_ontology}/v2#" - xslt_root = etree.XML(xslt_data) - transform = etree.XSLT(xslt_root) - replacement_value = etree.XSLT.strparam(namespace) - return transform(root, replacementValue=replacement_value).getroot() + namespace_lookup = _make_namespace_lookup(api_url, shortcode, default_ontology) + for ele in new_root.iterdescendants(): + if (found := ele.attrib.get("restype")) or (found := ele.attrib.get("name")): + split_found = found.split(":") + if len(split_found) == 1: + ele.set("restype" if "restype" in ele.attrib else "name", f"{KNORA_API}{found}") + elif len(split_found) == 2: + if len(split_found[0]) == 0: + found_namespace = namespace_lookup[default_ontology] + elif not (namespace := namespace_lookup.get(split_found[0])): + found_namespace = _construct_namespace(api_url, shortcode, split_found[0]) + namespace_lookup[split_found[0]] = found_namespace + else: + found_namespace = namespace + ele.set("restype" if "restype" in ele.attrib else "name", f"{found_namespace}{split_found[1]}") + else: + raise InputError( + f"It is not permissible to have a colon in a property or resource class name. " + f"Please correct the following: {found}" + ) + return XMLProject( + shortcode=shortcode, + root=new_root, + used_ontologies=set(namespace_lookup.values()), + ) + + +def _make_namespace_lookup(api_url: str, shortcode: str, default_onto: str) -> dict[str, str]: + return {default_onto: _construct_namespace(api_url, shortcode, default_onto), "knora-api": KNORA_API} + + +def _construct_namespace(api_url: str, shortcode: str, onto_name: str) -> str: + return f"{api_url}/ontology/{shortcode}/{onto_name}/v2#" diff --git a/src/dsp_tools/resources/validate_data/replace_namespace.xslt b/src/dsp_tools/resources/validate_data/replace_namespace.xslt deleted file mode 100644 index c41c243c0f..0000000000 --- a/src/dsp_tools/resources/validate_data/replace_namespace.xslt +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/test/integration/commands/validate_data/test_reformat_input.py b/test/integration/commands/validate_data/test_reformat_input.py index c4e1749118..0c69708b36 100644 --- a/test/integration/commands/validate_data/test_reformat_input.py +++ b/test/integration/commands/validate_data/test_reformat_input.py @@ -10,7 +10,7 @@ def test_to_data_rdf(data_xml: etree._Element) -> None: assert isinstance(res, ProjectDeserialised) assert res.info.shortcode == "9999" assert res.info.default_onto == "onto" - assert len(res.data.resources) == 14 + assert len(res.data.resources) == 15 if __name__ == "__main__": diff --git a/test/integration/commands/validate_data/test_validate_data.py b/test/integration/commands/validate_data/test_validate_data.py index 711e601385..5a16e4c278 100644 --- a/test/integration/commands/validate_data/test_validate_data.py +++ b/test/integration/commands/validate_data/test_validate_data.py @@ -4,7 +4,10 @@ def test_to_data_rdf(data_xml: etree._Element) -> None: res_list = list(data_xml.iterdescendants(tag="resource")) all_types = {x.attrib["restype"] for x in res_list} - assert all_types == {"http://0.0.0.0:3333/ontology/9999/onto/v2#ClassWithEverything"} + assert all_types == { + "http://0.0.0.0:3333/ontology/9999/onto/v2#ClassWithEverything", + "http://0.0.0.0:3333/ontology/9999/second-onto/v2#SecondOntoClass", + } expected_names = { "http://0.0.0.0:3333/ontology/9999/onto/v2#testBoolean", "http://0.0.0.0:3333/ontology/9999/onto/v2#testColor", @@ -15,9 +18,11 @@ def test_to_data_rdf(data_xml: etree._Element) -> None: "http://0.0.0.0:3333/ontology/9999/onto/v2#testListProp", "http://0.0.0.0:3333/ontology/9999/onto/v2#testHasLinkTo", "http://0.0.0.0:3333/ontology/9999/onto/v2#testRichtext", + "http://0.0.0.0:3333/ontology/9999/onto/v2#testSimpleText", "http://0.0.0.0:3333/ontology/9999/onto/v2#testTextarea", "http://0.0.0.0:3333/ontology/9999/onto/v2#testTimeValue", "http://0.0.0.0:3333/ontology/9999/onto/v2#testUriValue", + "http://0.0.0.0:3333/ontology/9999/second-onto/v2#testBoolean", "http://api.knora.org/ontology/knora-api/v2#hasColor", "http://api.knora.org/ontology/knora-api/v2#isRegionOf", "http://api.knora.org/ontology/knora-api/v2#hasGeometry", diff --git a/test/integration/commands/validate_data/xml_fixtures.py b/test/integration/commands/validate_data/xml_fixtures.py index a3e5fa4743..293da7fac9 100644 --- a/test/integration/commands/validate_data/xml_fixtures.py +++ b/test/integration/commands/validate_data/xml_fixtures.py @@ -8,4 +8,4 @@ @pytest.fixture def data_xml() -> etree._Element: - return _parse_and_clean_file(Path("testdata/validate-data/data/minimal_correct.xml"), "http://0.0.0.0:3333") + return _parse_and_clean_file(Path("testdata/validate-data/data/minimal_correct.xml"), "http://0.0.0.0:3333").root diff --git a/testdata/validate-data/data/minimal_correct.xml b/testdata/validate-data/data/minimal_correct.xml index 5d84964061..0874750578 100644 --- a/testdata/validate-data/data/minimal_correct.xml +++ b/testdata/validate-data/data/minimal_correct.xml @@ -80,6 +80,15 @@ + + + true + + + Text + + + #5d1f1e diff --git a/testdata/validate-data/project.json b/testdata/validate-data/project.json index eb4ca0c006..e8c4e5b46b 100644 --- a/testdata/validate-data/project.json +++ b/testdata/validate-data/project.json @@ -505,6 +505,44 @@ ] } ] + }, + { + "name": "second-onto", + "label": "Second Ontology", + "properties": [ + { + "name": "testBoolean", + "super": [ + "hasValue" + ], + "object": "BooleanValue", + "labels": { + "en": "Test Boolean of the second onto" + }, + "gui_element": "Checkbox" + } + ], + "resources": [ + { + "name": "SecondOntoClass", + "super": "Resource", + "labels": { + "en": "Resource with all cardinality options" + }, + "cardinalities": [ + { + "propname": ":testBoolean", + "cardinality": "0-1", + "gui_order": 0 + }, + { + "propname": "onto:testSimpleText", + "cardinality": "0-n", + "gui_order": 1 + } + ] + } + ] } ] }