Skip to content

Commit

Permalink
fix(validate-data): enable usage of other ontologies besides the defa…
Browse files Browse the repository at this point in the history
…ult ontology (DEV-4263) (#1235)
  • Loading branch information
Nora-Olivia-Ammann authored Oct 22, 2024
1 parent 3dcba5d commit a2f7959
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from abc import ABC
from dataclasses import dataclass

from lxml import etree


@dataclass
class XMLProject:
shortcode: str
root: etree._Element
used_ontologies: set[str]


@dataclass
class ProjectDeserialised:
Expand Down
52 changes: 41 additions & 11 deletions src/dsp_tools/commands/validate_data/validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,20 @@
from dsp_tools.commands.validate_data.deserialise_input import deserialise_xml
from dsp_tools.commands.validate_data.make_data_rdf import make_data_rdf
from dsp_tools.commands.validate_data.models.data_deserialised import ProjectDeserialised
from dsp_tools.commands.validate_data.models.data_deserialised import XMLProject
from dsp_tools.commands.validate_data.models.data_rdf import DataRDF
from dsp_tools.commands.validate_data.models.validation import RDFGraphs
from dsp_tools.commands.validate_data.models.validation import ValidationReport
from dsp_tools.commands.validate_data.reformat_validaton_result import reformat_validation_graph
from dsp_tools.commands.validate_data.sparql.construct_shacl import construct_shapes_graph
from dsp_tools.models.exceptions import InputError
from dsp_tools.utils.xml_utils import parse_xml_file
from dsp_tools.utils.xml_utils import remove_comments_from_element_tree
from dsp_tools.utils.xml_utils import transform_into_localnames
from dsp_tools.utils.xml_validation import validate_xml

LIST_SEPARATOR = "\n - "
KNORA_API = "http://api.knora.org/ontology/knora-api/v2#"


def validate_data(filepath: Path, api_url: str, dev_route: bool, save_graphs: bool) -> bool: # noqa: ARG001 (unused argument)
Expand Down Expand Up @@ -141,27 +144,54 @@ def _validate(validator: ShaclValidator, rdf_graphs: RDFGraphs) -> ValidationRep


def _get_data_info_from_file(file: Path, api_url: str) -> tuple[DataRDF, str]:
cleaned_root = _parse_and_clean_file(file, api_url)
deserialised: ProjectDeserialised = deserialise_xml(cleaned_root)
xml_project = _parse_and_clean_file(file, api_url)
deserialised: ProjectDeserialised = deserialise_xml(xml_project.root)
rdf_data: DataRDF = make_data_rdf(deserialised.data)
return rdf_data, deserialised.info.shortcode


def _parse_and_clean_file(file: Path, api_url: str) -> etree._Element:
def _parse_and_clean_file(file: Path, api_url: str) -> XMLProject:
root = parse_xml_file(file)
root = remove_comments_from_element_tree(root)
validate_xml(root)
root = transform_into_localnames(root)
return _replace_namespaces(root, api_url)


def _replace_namespaces(root: etree._Element, api_url: str) -> etree._Element:
with open("src/dsp_tools/resources/validate_data/replace_namespace.xslt", "rb") as xslt_file:
xslt_data = xslt_file.read()
def _replace_namespaces(root: etree._Element, api_url: str) -> XMLProject:
new_root = deepcopy(root)
shortcode = root.attrib["shortcode"]
default_ontology = root.attrib["default-ontology"]
namespace = f"{api_url}/ontology/{shortcode}/{default_ontology}/v2#"
xslt_root = etree.XML(xslt_data)
transform = etree.XSLT(xslt_root)
replacement_value = etree.XSLT.strparam(namespace)
return transform(root, replacementValue=replacement_value).getroot()
namespace_lookup = _make_namespace_lookup(api_url, shortcode, default_ontology)
for ele in new_root.iterdescendants():
if (found := ele.attrib.get("restype")) or (found := ele.attrib.get("name")):
split_found = found.split(":")
if len(split_found) == 1:
ele.set("restype" if "restype" in ele.attrib else "name", f"{KNORA_API}{found}")
elif len(split_found) == 2:
if len(split_found[0]) == 0:
found_namespace = namespace_lookup[default_ontology]
elif not (namespace := namespace_lookup.get(split_found[0])):
found_namespace = _construct_namespace(api_url, shortcode, split_found[0])
namespace_lookup[split_found[0]] = found_namespace
else:
found_namespace = namespace
ele.set("restype" if "restype" in ele.attrib else "name", f"{found_namespace}{split_found[1]}")
else:
raise InputError(
f"It is not permissible to have a colon in a property or resource class name. "
f"Please correct the following: {found}"
)
return XMLProject(
shortcode=shortcode,
root=new_root,
used_ontologies=set(namespace_lookup.values()),
)


def _make_namespace_lookup(api_url: str, shortcode: str, default_onto: str) -> dict[str, str]:
return {default_onto: _construct_namespace(api_url, shortcode, default_onto), "knora-api": KNORA_API}


def _construct_namespace(api_url: str, shortcode: str, onto_name: str) -> str:
return f"{api_url}/ontology/{shortcode}/{onto_name}/v2#"
61 changes: 0 additions & 61 deletions src/dsp_tools/resources/validate_data/replace_namespace.xslt

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_to_data_rdf(data_xml: etree._Element) -> None:
assert isinstance(res, ProjectDeserialised)
assert res.info.shortcode == "9999"
assert res.info.default_onto == "onto"
assert len(res.data.resources) == 14
assert len(res.data.resources) == 15


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
def test_to_data_rdf(data_xml: etree._Element) -> None:
res_list = list(data_xml.iterdescendants(tag="resource"))
all_types = {x.attrib["restype"] for x in res_list}
assert all_types == {"http://0.0.0.0:3333/ontology/9999/onto/v2#ClassWithEverything"}
assert all_types == {
"http://0.0.0.0:3333/ontology/9999/onto/v2#ClassWithEverything",
"http://0.0.0.0:3333/ontology/9999/second-onto/v2#SecondOntoClass",
}
expected_names = {
"http://0.0.0.0:3333/ontology/9999/onto/v2#testBoolean",
"http://0.0.0.0:3333/ontology/9999/onto/v2#testColor",
Expand All @@ -15,9 +18,11 @@ def test_to_data_rdf(data_xml: etree._Element) -> None:
"http://0.0.0.0:3333/ontology/9999/onto/v2#testListProp",
"http://0.0.0.0:3333/ontology/9999/onto/v2#testHasLinkTo",
"http://0.0.0.0:3333/ontology/9999/onto/v2#testRichtext",
"http://0.0.0.0:3333/ontology/9999/onto/v2#testSimpleText",
"http://0.0.0.0:3333/ontology/9999/onto/v2#testTextarea",
"http://0.0.0.0:3333/ontology/9999/onto/v2#testTimeValue",
"http://0.0.0.0:3333/ontology/9999/onto/v2#testUriValue",
"http://0.0.0.0:3333/ontology/9999/second-onto/v2#testBoolean",
"http://api.knora.org/ontology/knora-api/v2#hasColor",
"http://api.knora.org/ontology/knora-api/v2#isRegionOf",
"http://api.knora.org/ontology/knora-api/v2#hasGeometry",
Expand Down
2 changes: 1 addition & 1 deletion test/integration/commands/validate_data/xml_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@

@pytest.fixture
def data_xml() -> etree._Element:
return _parse_and_clean_file(Path("testdata/validate-data/data/minimal_correct.xml"), "http://0.0.0.0:3333")
return _parse_and_clean_file(Path("testdata/validate-data/data/minimal_correct.xml"), "http://0.0.0.0:3333").root
9 changes: 9 additions & 0 deletions testdata/validate-data/data/minimal_correct.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@
</uri-prop>
</resource>

<resource label="Class of second ontology" restype="second-onto:SecondOntoClass" id="second_onto_class">
<boolean-prop name="second-onto:testBoolean">
<boolean>true</boolean>
</boolean-prop>
<text-prop name=":testSimpleText">
<text encoding="utf8">Text</text>
</text-prop>
</resource>

<region label="Region" id="region_1">
<color-prop name="hasColor">
<color permissions="prop-restricted">#5d1f1e</color>
Expand Down
38 changes: 38 additions & 0 deletions testdata/validate-data/project.json
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,44 @@
]
}
]
},
{
"name": "second-onto",
"label": "Second Ontology",
"properties": [
{
"name": "testBoolean",
"super": [
"hasValue"
],
"object": "BooleanValue",
"labels": {
"en": "Test Boolean of the second onto"
},
"gui_element": "Checkbox"
}
],
"resources": [
{
"name": "SecondOntoClass",
"super": "Resource",
"labels": {
"en": "Resource with all cardinality options"
},
"cardinalities": [
{
"propname": ":testBoolean",
"cardinality": "0-1",
"gui_order": 0
},
{
"propname": "onto:testSimpleText",
"cardinality": "0-n",
"gui_order": 1
}
]
}
]
}
]
}
Expand Down

0 comments on commit a2f7959

Please sign in to comment.